68 #include "llvm/IR/IntrinsicsPowerPC.h"
102 using namespace llvm;
104 #define DEBUG_TYPE "ppc-lowering"
110 cl::desc(
"disable setting the node scheduling preference to ILP on PPC"),
cl::Hidden);
125 "ppc-quadword-atomics",
131 cl::desc(
"disable vector permute decomposition"),
135 "disable-auto-paired-vec-st",
136 cl::desc(
"disable automatically generated 32byte paired vector stores"),
139 STATISTIC(NumTailCalls,
"Number of tail calls");
140 STATISTIC(NumSiblingCalls,
"Number of sibling calls");
142 "Number of shuffles lowered to a VPERM or XXPERM");
143 STATISTIC(NumDynamicAllocaProbed,
"Number of dynamic stack allocation probed");
160 initializeAddrModeMap();
163 bool isPPC64 = Subtarget.
isPPC64();
172 if (!Subtarget.hasEFPU2())
197 if (Subtarget.isISA3_0()) {
227 if (!Subtarget.hasSPE()) {
236 for (
MVT VT : ScalarIntVTs) {
243 if (Subtarget.useCRBits()) {
246 if (isPPC64 || Subtarget.hasFPCVT()) {
320 if (Subtarget.isISA3_0()) {
355 if (!Subtarget.hasSPE()) {
360 if (Subtarget.hasVSX()) {
365 if (Subtarget.hasFSQRT()) {
370 if (Subtarget.hasFPRND()) {
411 if (Subtarget.hasSPE()) {
419 if (Subtarget.hasSPE())
425 if (!Subtarget.hasFSQRT() &&
426 !(
TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
430 if (!Subtarget.hasFSQRT() &&
431 !(
TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
432 Subtarget.hasFRES()))
435 if (Subtarget.hasFCPSGN()) {
443 if (Subtarget.hasFPRND()) {
457 if (Subtarget.isISA3_1()) {
468 if (Subtarget.isISA3_0()) {
488 if (!Subtarget.useCRBits()) {
501 if (!Subtarget.useCRBits())
504 if (Subtarget.hasFPU()) {
515 if (!Subtarget.useCRBits())
520 if (Subtarget.hasSPE()) {
544 if (Subtarget.hasDirectMove() && isPPC64) {
549 if (
TM.Options.UnsafeFPMath) {
652 if (Subtarget.hasSPE()) {
674 if (Subtarget.has64BitSupport()) {
689 if (Subtarget.hasLFIWAX() || Subtarget.
isPPC64()) {
695 if (Subtarget.hasSPE()) {
705 if (Subtarget.hasFPCVT()) {
706 if (Subtarget.has64BitSupport()) {
727 if (Subtarget.use64BitRegs()) {
745 if (Subtarget.has64BitSupport()) {
752 if (Subtarget.hasVSX()) {
759 if (Subtarget.hasAltivec()) {
775 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
788 if (Subtarget.hasVSX()) {
794 if (Subtarget.hasP8Altivec() && (VT.SimpleTy !=
MVT::v1i128)) {
804 if (Subtarget.hasP9Altivec() && (VT.SimpleTy !=
MVT::v1i128))
877 if (!Subtarget.hasP8Vector()) {
919 if (Subtarget.hasAltivec())
923 if (Subtarget.hasP8Altivec())
934 if (Subtarget.hasVSX()) {
940 if (Subtarget.hasP8Altivec())
945 if (Subtarget.isISA3_1()) {
983 if (Subtarget.hasVSX()) {
986 if (Subtarget.hasP8Vector()) {
990 if (Subtarget.hasDirectMove() && isPPC64) {
1004 if (
TM.Options.UnsafeFPMath) {
1041 if (Subtarget.hasP8Vector())
1050 if (Subtarget.hasP8Altivec()) {
1077 if (Subtarget.isISA3_1())
1179 if (Subtarget.hasP8Altivec()) {
1184 if (Subtarget.hasP9Vector()) {
1241 }
else if (Subtarget.hasVSX()) {
1285 if (Subtarget.hasP9Altivec()) {
1286 if (Subtarget.isISA3_1()) {
1304 if (Subtarget.hasP10Vector()) {
1309 if (Subtarget.pairedVectorMemops()) {
1314 if (Subtarget.hasMMA()) {
1315 if (Subtarget.isISAFuture())
1324 if (Subtarget.has64BitSupport())
1327 if (Subtarget.isISA3_1())
1345 if (Subtarget.hasAltivec()) {
1372 if (Subtarget.hasFPCVT())
1375 if (Subtarget.useCRBits())
1384 if (Subtarget.useCRBits()) {
1388 if (Subtarget.hasP9Altivec()) {
1418 if (Subtarget.useCRBits()) {
1515 void PPCTargetLowering::initializeAddrModeMap() {
1566 if (MaxAlign == MaxMaxAlign)
1568 if (
VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1569 if (MaxMaxAlign >= 32 &&
1570 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1571 MaxAlign =
Align(32);
1572 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1574 MaxAlign =
Align(16);
1575 }
else if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1578 if (EltAlign > MaxAlign)
1579 MaxAlign = EltAlign;
1580 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
1581 for (
auto *EltTy : STy->elements()) {
1584 if (EltAlign > MaxAlign)
1585 MaxAlign = EltAlign;
1586 if (MaxAlign == MaxMaxAlign)
1599 if (Subtarget.hasAltivec())
1601 return Alignment.
value();
1609 return Subtarget.hasSPE();
1631 return "PPCISD::FP_TO_UINT_IN_VSR,";
1633 return "PPCISD::FP_TO_SINT_IN_VSR";
1637 return "PPCISD::FTSQRT";
1639 return "PPCISD::FSQRT";
1644 return "PPCISD::XXSPLTI_SP_TO_DP";
1646 return "PPCISD::XXSPLTI32DX";
1650 return "PPCISD::XXPERM";
1670 return "PPCISD::CALL_RM";
1672 return "PPCISD::CALL_NOP_RM";
1674 return "PPCISD::CALL_NOTOC_RM";
1679 return "PPCISD::BCTRL_RM";
1681 return "PPCISD::BCTRL_LOAD_TOC_RM";
1693 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1695 return "PPCISD::ANDI_rec_1_EQ_BIT";
1697 return "PPCISD::ANDI_rec_1_GT_BIT";
1712 return "PPCISD::ST_VSR_SCAL_INT";
1738 return "PPCISD::PADDI_DTPREL";
1755 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1757 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1767 return "PPCISD::STRICT_FADDRTZ";
1769 return "PPCISD::STRICT_FCTIDZ";
1771 return "PPCISD::STRICT_FCTIWZ";
1773 return "PPCISD::STRICT_FCTIDUZ";
1775 return "PPCISD::STRICT_FCTIWUZ";
1777 return "PPCISD::STRICT_FCFID";
1779 return "PPCISD::STRICT_FCFIDU";
1781 return "PPCISD::STRICT_FCFIDS";
1783 return "PPCISD::STRICT_FCFIDUS";
1786 return "PPCISD::STORE_COND";
1811 return CFP->getValueAPF().isZero();
1815 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(
CP->getConstVal()))
1816 return CFP->getValueAPF().isZero();
1824 return Op < 0 ||
Op == Val;
1836 if (ShuffleKind == 0) {
1839 for (
unsigned i = 0;
i != 16; ++
i)
1842 }
else if (ShuffleKind == 2) {
1845 for (
unsigned i = 0;
i != 16; ++
i)
1848 }
else if (ShuffleKind == 1) {
1849 unsigned j = IsLE ? 0 : 1;
1850 for (
unsigned i = 0;
i != 8; ++
i)
1867 if (ShuffleKind == 0) {
1870 for (
unsigned i = 0;
i != 16;
i += 2)
1874 }
else if (ShuffleKind == 2) {
1877 for (
unsigned i = 0;
i != 16;
i += 2)
1881 }
else if (ShuffleKind == 1) {
1882 unsigned j = IsLE ? 0 : 2;
1883 for (
unsigned i = 0;
i != 8;
i += 2)
1904 if (!Subtarget.hasP8Vector())
1908 if (ShuffleKind == 0) {
1911 for (
unsigned i = 0;
i != 16;
i += 4)
1917 }
else if (ShuffleKind == 2) {
1920 for (
unsigned i = 0;
i != 16;
i += 4)
1926 }
else if (ShuffleKind == 1) {
1927 unsigned j = IsLE ? 0 : 4;
1928 for (
unsigned i = 0;
i != 8;
i += 4)
1945 unsigned LHSStart,
unsigned RHSStart) {
1948 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1949 "Unsupported merge size!");
1951 for (
unsigned i = 0;
i != 8/UnitSize; ++
i)
1952 for (
unsigned j = 0;
j != UnitSize; ++
j) {
1954 LHSStart+
j+
i*UnitSize) ||
1956 RHSStart+
j+
i*UnitSize))
1971 if (ShuffleKind == 1)
1973 else if (ShuffleKind == 2)
1978 if (ShuffleKind == 1)
1980 else if (ShuffleKind == 0)
1996 if (ShuffleKind == 1)
1998 else if (ShuffleKind == 2)
2003 if (ShuffleKind == 1)
2005 else if (ShuffleKind == 0)
2055 unsigned RHSStartValue) {
2059 for (
unsigned i = 0;
i < 2; ++
i)
2060 for (
unsigned j = 0;
j < 4; ++
j)
2062 i*RHSStartValue+
j+IndexOffset) ||
2064 i*RHSStartValue+
j+IndexOffset+8))
2086 unsigned indexOffset = CheckEven ? 4 : 0;
2087 if (ShuffleKind == 1)
2089 else if (ShuffleKind == 2)
2095 unsigned indexOffset = CheckEven ? 0 : 4;
2096 if (ShuffleKind == 1)
2098 else if (ShuffleKind == 0)
2124 if (
i == 16)
return -1;
2129 if (ShiftAmt <
i)
return -1;
2134 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2136 for (++
i;
i != 16; ++
i)
2139 }
else if (ShuffleKind == 1) {
2141 for (++
i;
i != 16; ++
i)
2148 ShiftAmt = 16 - ShiftAmt;
2157 EVT VT =
N->getValueType(0);
2159 return EltSize == 8 &&
N->getMaskElt(0) ==
N->getMaskElt(1);
2162 EltSize <= 8 &&
"Can only handle 1,2,4,8 byte element sizes");
2166 if (
N->getMaskElt(0) % EltSize != 0)
2171 unsigned ElementBase =
N->getMaskElt(0);
2174 if (ElementBase >= 16)
2179 for (
unsigned i = 1;
i != EltSize; ++
i)
2180 if (
N->getMaskElt(
i) < 0 ||
N->getMaskElt(
i) != (
int)(
i+ElementBase))
2183 for (
unsigned i = EltSize,
e = 16;
i !=
e;
i += EltSize) {
2184 if (
N->getMaskElt(
i) < 0)
continue;
2185 for (
unsigned j = 0;
j != EltSize; ++
j)
2186 if (
N->getMaskElt(
i+
j) !=
N->getMaskElt(
j))
2204 "Unexpected element width.");
2205 assert((StepLen == 1 || StepLen == -1) &&
"Unexpected element width.");
2207 unsigned NumOfElem = 16 /
Width;
2208 unsigned MaskVal[16];
2209 for (
unsigned i = 0;
i < NumOfElem; ++
i) {
2210 MaskVal[0] =
N->getMaskElt(
i *
Width);
2211 if ((StepLen == 1) && (MaskVal[0] %
Width)) {
2213 }
else if ((StepLen == -1) && ((MaskVal[0] + 1) %
Width)) {
2217 for (
unsigned int j = 1;
j <
Width; ++
j) {
2218 MaskVal[
j] =
N->getMaskElt(
i *
Width +
j);
2219 if (MaskVal[
j] != MaskVal[
j-1] + StepLen) {
2229 unsigned &InsertAtByte,
bool &Swap,
bool IsLE) {
2234 unsigned M0 =
N->getMaskElt(0) / 4;
2235 unsigned M1 =
N->getMaskElt(4) / 4;
2236 unsigned M2 =
N->getMaskElt(8) / 4;
2237 unsigned M3 =
N->getMaskElt(12) / 4;
2238 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2239 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2244 if ((
M0 > 3 &&
M1 == 1 && M2 == 2 && M3 == 3) ||
2245 (
M0 < 4 &&
M1 == 5 && M2 == 6 && M3 == 7)) {
2246 ShiftElts = IsLE ? LittleEndianShifts[
M0 & 0x3] : BigEndianShifts[
M0 & 0x3];
2247 InsertAtByte = IsLE ? 12 : 0;
2252 if ((
M1 > 3 &&
M0 == 0 && M2 == 2 && M3 == 3) ||
2253 (
M1 < 4 &&
M0 == 4 && M2 == 6 && M3 == 7)) {
2254 ShiftElts = IsLE ? LittleEndianShifts[
M1 & 0x3] : BigEndianShifts[
M1 & 0x3];
2255 InsertAtByte = IsLE ? 8 : 4;
2260 if ((M2 > 3 &&
M0 == 0 &&
M1 == 1 && M3 == 3) ||
2261 (M2 < 4 &&
M0 == 4 &&
M1 == 5 && M3 == 7)) {
2262 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2263 InsertAtByte = IsLE ? 4 : 8;
2268 if ((M3 > 3 &&
M0 == 0 &&
M1 == 1 && M2 == 2) ||
2269 (M3 < 4 &&
M0 == 4 &&
M1 == 5 && M2 == 6)) {
2270 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2271 InsertAtByte = IsLE ? 0 : 12;
2278 if (
N->getOperand(1).isUndef()) {
2281 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2282 if (
M0 == XXINSERTWSrcElem &&
M1 == 1 && M2 == 2 && M3 == 3) {
2283 InsertAtByte = IsLE ? 12 : 0;
2286 if (
M0 == 0 &&
M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2287 InsertAtByte = IsLE ? 8 : 4;
2290 if (
M0 == 0 &&
M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2291 InsertAtByte = IsLE ? 4 : 8;
2294 if (
M0 == 0 &&
M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2295 InsertAtByte = IsLE ? 0 : 12;
2304 bool &Swap,
bool IsLE) {
2311 unsigned M0 =
N->getMaskElt(0) / 4;
2312 unsigned M1 =
N->getMaskElt(4) / 4;
2313 unsigned M2 =
N->getMaskElt(8) / 4;
2314 unsigned M3 =
N->getMaskElt(12) / 4;
2318 if (
N->getOperand(1).isUndef()) {
2319 assert(
M0 < 4 &&
"Indexing into an undef vector?");
2320 if (
M1 != (
M0 + 1) % 4 || M2 != (
M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2323 ShiftElts = IsLE ? (4 -
M0) % 4 :
M0;
2329 if (
M1 != (
M0 + 1) % 8 || M2 != (
M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2333 if (
M0 == 0 ||
M0 == 7 ||
M0 == 6 ||
M0 == 5) {
2338 ShiftElts = (8 -
M0) % 8;
2339 }
else if (
M0 == 4 ||
M0 == 3 ||
M0 == 2 ||
M0 == 1) {
2344 ShiftElts = (4 -
M0) % 4;
2349 if (
M0 == 0 ||
M0 == 1 ||
M0 == 2 ||
M0 == 3) {
2354 }
else if (
M0 == 4 ||
M0 == 5 ||
M0 == 6 ||
M0 == 7) {
2371 for (
int i = 0;
i < 16;
i +=
Width)
2372 if (
N->getMaskElt(
i) !=
i +
Width - 1)
2403 bool &Swap,
bool IsLE) {
2410 unsigned M0 =
N->getMaskElt(0) / 8;
2411 unsigned M1 =
N->getMaskElt(8) / 8;
2412 assert(((
M0 |
M1) < 4) &&
"A mask element out of bounds?");
2416 if (
N->getOperand(1).isUndef()) {
2417 if ((
M0 |
M1) < 2) {
2418 DM = IsLE ? (((~
M1) & 1) << 1) + ((~
M0) & 1) : (
M0 << 1) + (
M1 & 1);
2426 if (
M0 > 1 &&
M1 < 2) {
2428 }
else if (M0 < 2 && M1 > 1) {
2436 DM = (((~
M1) & 1) << 1) + ((~
M0) & 1);
2439 if (M0 < 2 && M1 > 1) {
2441 }
else if (
M0 > 1 &&
M1 < 2) {
2449 DM = (
M0 << 1) + (
M1 & 1);
2469 return (16 / EltSize) - 1 - (SVOp->
getMaskElt(0) / EltSize);
2485 unsigned EltSize = 16/
N->getNumOperands();
2486 if (EltSize < ByteSize) {
2487 unsigned Multiple = ByteSize/EltSize;
2489 assert(Multiple > 1 && Multiple <= 4 &&
"How can this happen?");
2492 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2493 if (
N->getOperand(
i).isUndef())
continue;
2495 if (!isa<ConstantSDNode>(
N->getOperand(
i)))
return SDValue();
2497 if (!UniquedVals[
i&(Multiple-1)].getNode())
2498 UniquedVals[
i&(Multiple-1)] =
N->getOperand(
i);
2499 else if (UniquedVals[
i&(Multiple-1)] !=
N->getOperand(
i))
2509 bool LeadingZero =
true;
2510 bool LeadingOnes =
true;
2511 for (
unsigned i = 0;
i != Multiple-1; ++
i) {
2512 if (!UniquedVals[
i].getNode())
continue;
2519 if (!UniquedVals[Multiple-1].getNode())
2521 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2526 if (!UniquedVals[Multiple-1].getNode())
2528 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2537 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2538 if (
N->getOperand(
i).isUndef())
continue;
2540 OpVal =
N->getOperand(
i);
2541 else if (OpVal !=
N->getOperand(
i))
2547 unsigned ValSizeInBytes = EltSize;
2550 Value = CN->getZExtValue();
2552 assert(CN->getValueType(0) ==
MVT::f32 &&
"Only one legal FP vector type!");
2553 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2559 if (ValSizeInBytes < ByteSize)
return SDValue();
2570 if (MaskVal == 0)
return SDValue();
2573 if (SignExtend32<5>(MaskVal) == MaskVal)
2587 if (!isa<ConstantSDNode>(
N))
2590 Imm = (int16_t)cast<ConstantSDNode>(
N)->getZExtValue();
2592 return Imm == (int32_t)cast<ConstantSDNode>(
N)->getZExtValue();
2594 return Imm == (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2612 return (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0);
2621 if (
MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2622 if (Memop->getMemoryVT() ==
MVT::f64) {
2623 Base =
N.getOperand(0);
2636 if (!isa<ConstantSDNode>(
N))
2639 Imm = (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2640 return isInt<34>(
Imm);
2667 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm)))
2672 Base =
N.getOperand(0);
2675 }
else if (
N.getOpcode() ==
ISD::OR) {
2677 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm)))
2689 if (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0) {
2690 Base =
N.getOperand(0);
2761 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2767 Base =
N.getOperand(0);
2770 }
else if (
N.getOperand(1).getOpcode() ==
PPCISD::Lo) {
2772 assert(!cast<ConstantSDNode>(
N.getOperand(1).getOperand(1))->getZExtValue()
2773 &&
"Cannot handle constant offsets yet!");
2774 Disp =
N.getOperand(1).getOperand(0);
2779 Base =
N.getOperand(0);
2782 }
else if (
N.getOpcode() ==
ISD::OR) {
2785 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2795 dyn_cast<FrameIndexSDNode>(
N.getOperand(0))) {
2799 Base =
N.getOperand(0);
2812 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm))) {
2815 CN->getValueType(0));
2820 if ((CN->getValueType(0) ==
MVT::i32 ||
2821 (int64_t)CN->getZExtValue() == (
int)CN->getZExtValue()) &&
2822 (!EncodingAlignment ||
2823 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2824 int Addr = (
int)CN->getZExtValue();
2865 Base =
N.getOperand(0);
2881 Base =
N.getOperand(0);
2914 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
2915 Base =
N.getOperand(0);
2928 Ty *PCRelCand = dyn_cast<Ty>(
N);
2940 if (isValidPCRelNode<ConstantPoolSDNode>(
N) ||
2941 isValidPCRelNode<GlobalAddressSDNode>(
N) ||
2942 isValidPCRelNode<JumpTableSDNode>(
N) ||
2943 isValidPCRelNode<BlockAddressSDNode>(
N))
2959 EVT MemVT =
LD->getMemoryVT();
2966 if (!
ST.hasP8Vector())
2971 if (!
ST.hasP9Vector())
2984 if (UI.getUse().get().getResNo() == 0 &&
3006 Ptr =
LD->getBasePtr();
3007 VT =
LD->getMemoryVT();
3008 Alignment =
LD->getAlign();
3010 Ptr =
ST->getBasePtr();
3011 VT =
ST->getMemoryVT();
3012 Alignment =
ST->getAlign();
3035 if (isa<FrameIndexSDNode>(
Base) || isa<RegisterSDNode>(
Base))
3038 SDValue Val = cast<StoreSDNode>(
N)->getValue();
3056 if (Alignment <
Align(4))
3068 isa<ConstantSDNode>(
Offset))
3083 unsigned &HiOpFlags,
unsigned &LoOpFlags,
3125 const bool Is64Bit = Subtarget.
isPPC64();
3140 EVT PtrVT =
Op.getValueType();
3156 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3159 unsigned MOHiFlag, MOLoFlag;
3166 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3226 EVT PtrVT =
Op.getValueType();
3244 return getTOCEntry(DAG,
SDLoc(
JT), GA);
3247 unsigned MOHiFlag, MOLoFlag;
3254 return getTOCEntry(DAG,
SDLoc(GA), GA);
3264 EVT PtrVT =
Op.getValueType();
3283 return getTOCEntry(DAG,
SDLoc(BASDN), GA);
3292 unsigned MOHiFlag, MOLoFlag;
3303 return LowerGlobalTLSAddressAIX(
Op, DAG);
3305 return LowerGlobalTLSAddressLinux(
Op, DAG);
3329 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3330 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3348 bool is64bit = Subtarget.
isPPC64();
3396 if (!
TM.isPositionIndependent())
3455 PtrVT, GOTPtr, TGA, TGA);
3457 PtrVT, TLSAddr, TGA);
3466 EVT PtrVT =
Op.getValueType();
3492 return getTOCEntry(DAG,
DL, GA);
3495 unsigned MOHiFlag, MOLoFlag;
3503 return getTOCEntry(DAG,
DL, GA);
3515 bool IsStrict =
Op->isStrictFPOpcode();
3517 cast<CondCodeSDNode>(
Op.getOperand(IsStrict ? 3 : 2))->get();
3521 EVT LHSVT =
LHS.getValueType();
3526 assert(!Subtarget.hasP9Vector() &&
3527 "SETCC for f128 is already legal under Power9!");
3538 assert(!IsStrict &&
"Don't know how to handle STRICT_FSETCC!");
3551 int ShuffV[] = {1, 0, 3, 2};
3573 if (
C->isAllOnes() ||
C->isZero())
3583 EVT VT =
Op.getValueType();
3592 EVT VT = Node->getValueType(0);
3594 SDValue InChain = Node->getOperand(0);
3595 SDValue VAListPtr = Node->getOperand(1);
3596 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3637 InChain = OverflowArea.
getValue(1);
3683 InChain = DAG.
getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3690 assert(!Subtarget.
isPPC64() &&
"LowerVACOPY is PPC32 only");
3705 return Op.getOperand(0);
3714 "Expecting Inline ASM node.");
3723 unsigned NumOps =
Op.getNumOperands();
3724 if (
Op.getOperand(NumOps - 1).getValueType() ==
MVT::Glue)
3729 unsigned Flags = cast<ConstantSDNode>(
Op.getOperand(
i))->getZExtValue();
3744 for (; NumVals; --NumVals, ++
i) {
3746 if (
Reg != PPC::LR &&
Reg != PPC::LR8)
3771 bool isPPC64 = (PtrVT ==
MVT::i64);
3775 TargetLowering::ArgListEntry Entry;
3777 Entry.Ty = IntPtrTy;
3778 Entry.Node = Trmp;
Args.push_back(Entry);
3781 Entry.Node = DAG.
getConstant(isPPC64 ? 48 : 40, dl,
3783 Args.push_back(Entry);
3785 Entry.Node = FPtr;
Args.push_back(Entry);
3786 Entry.Node = Nest;
Args.push_back(Entry);
3790 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3794 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3795 return CallResult.second;
3809 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
3810 return DAG.
getStore(
Op.getOperand(0), dl, FR,
Op.getOperand(1),
3845 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3854 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
3869 nextPtr = DAG.
getNode(
ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3872 SDValue thirdStore = DAG.
getStore(secondStore, dl, StackOffsetFI, nextPtr,
3874 nextOffset += FrameOffset;
3875 nextPtr = DAG.
getNode(
ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3878 return DAG.
getStore(thirdStore, dl, FR, nextPtr,
3884 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3885 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3886 PPC::F11, PPC::F12, PPC::F13};
3891 unsigned PtrByteSize) {
3893 if (Flags.isByVal())
3894 ArgSize = Flags.getByValSize();
3898 if (!Flags.isInConsecutiveRegs())
3899 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3908 unsigned PtrByteSize) {
3909 Align Alignment(PtrByteSize);
3916 Alignment =
Align(16);
3919 if (Flags.isByVal()) {
3920 auto BVAlign = Flags.getNonZeroByValAlign();
3921 if (BVAlign > PtrByteSize) {
3922 if (BVAlign.value() % PtrByteSize != 0)
3924 "ByVal alignment is not a multiple of the pointer size");
3926 Alignment = BVAlign;
3931 if (Flags.isInConsecutiveRegs()) {
3949 unsigned PtrByteSize,
unsigned LinkageSize,
3950 unsigned ParamAreaSize,
unsigned &ArgOffset,
3951 unsigned &AvailableFPRs,
3952 unsigned &AvailableVRs) {
3953 bool UseMemory =
false;
3958 ArgOffset =
alignTo(ArgOffset, Alignment);
3961 if (ArgOffset >= LinkageSize + ParamAreaSize)
3966 if (Flags.isInConsecutiveRegsLast())
3967 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3970 if (ArgOffset > LinkageSize + ParamAreaSize)
3975 if (!Flags.isByVal()) {
3977 if (AvailableFPRs > 0) {
3985 if (AvailableVRs > 0) {
3997 unsigned NumBytes) {
4001 SDValue PPCTargetLowering::LowerFormalArguments(
4006 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg,
Ins, dl, DAG,
4009 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg,
Ins, dl, DAG,
4012 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg,
Ins, dl, DAG,
4016 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4058 const Align PtrAlign(4);
4067 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4069 CCInfo.PreAnalyzeFormalArguments(
Ins);
4072 CCInfo.clearWasPPCF128();
4074 for (
unsigned i = 0,
e = ArgLocs.size();
i !=
e; ++
i) {
4087 RC = &PPC::GPRCRegClass;
4090 if (Subtarget.hasP8Vector())
4091 RC = &PPC::VSSRCRegClass;
4092 else if (Subtarget.hasSPE())
4093 RC = &PPC::GPRCRegClass;
4095 RC = &PPC::F4RCRegClass;
4098 if (Subtarget.hasVSX())
4099 RC = &PPC::VSFRCRegClass;
4100 else if (Subtarget.hasSPE())
4102 RC = &PPC::GPRCRegClass;
4104 RC = &PPC::F8RCRegClass;
4109 RC = &PPC::VRRCRegClass;
4112 RC = &PPC::VRRCRegClass;
4116 RC = &PPC::VRRCRegClass;
4124 assert(
i + 1 <
e &&
"No second half of double precision argument");
4141 InVals.push_back(ArgValue);
4152 ArgOffset += ArgSize - ObjSize;
4170 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4175 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4176 MinReservedArea =
std::max(MinReservedArea, LinkageSize);
4193 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4195 const unsigned NumGPArgRegs =
std::size(GPArgRegs);
4198 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4201 unsigned NumFPArgRegs =
std::size(FPArgRegs);
4210 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4215 CCInfo.getNextStackOffset(),
true));
4224 for (
unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4228 VReg = MF.
addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4233 MemOps.push_back(
Store);
4243 for (
unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4247 VReg = MF.
addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4252 MemOps.push_back(
Store);
4260 if (!MemOps.empty())
4271 const SDLoc &dl)
const {
4275 else if (
Flags.isZExt())
4282 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4295 "fastcc not supported on varargs functions");
4301 unsigned PtrByteSize = 8;
4305 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4306 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4310 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4313 const unsigned Num_GPR_Regs =
std::size(GPR);
4315 const unsigned Num_VR_Regs =
std::size(VR);
4323 bool HasParameterArea = !isELFv2ABI || isVarArg;
4324 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4325 unsigned NumBytes = LinkageSize;
4326 unsigned AvailableFPRs = Num_FPR_Regs;
4327 unsigned AvailableVRs = Num_VR_Regs;
4328 for (
unsigned i = 0,
e =
Ins.size();
i !=
e; ++
i) {
4333 PtrByteSize, LinkageSize, ParamAreaSize,
4334 NumBytes, AvailableFPRs, AvailableVRs))
4335 HasParameterArea =
true;
4342 unsigned ArgOffset = LinkageSize;
4343 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4346 unsigned CurArgIdx = 0;
4347 for (
unsigned ArgNo = 0,
e =
Ins.size(); ArgNo !=
e; ++ArgNo) {
4349 bool needsLoad =
false;
4350 EVT ObjectVT =
Ins[ArgNo].VT;
4351 EVT OrigVT =
Ins[ArgNo].ArgVT;
4353 unsigned ArgSize = ObjSize;
4355 if (
Ins[ArgNo].isOrigArg()) {
4356 std::advance(FuncArg,
Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4357 CurArgIdx =
Ins[ArgNo].getOrigArgIndex();
4362 unsigned CurArgOffset;
4364 auto ComputeArgOffset = [&]() {
4368 ArgOffset =
alignTo(ArgOffset, Alignment);
4369 CurArgOffset = ArgOffset;
4376 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4377 GPR_idx =
std::min(GPR_idx, Num_GPR_Regs);
4382 if (
Flags.isByVal()) {
4383 assert(
Ins[ArgNo].isOrigArg() &&
"Byval arguments cannot be implicit");
4389 ObjSize =
Flags.getByValSize();
4390 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4401 InVals.push_back(FIN);
4412 if (HasParameterArea ||
4413 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4420 if (ObjSize < PtrByteSize) {
4424 if (!isLittleEndian) {
4428 InVals.push_back(
Arg);
4430 if (GPR_idx != Num_GPR_Regs) {
4438 MemOps.push_back(
Store);
4442 ArgOffset += PtrByteSize;
4448 InVals.push_back(FIN);
4451 for (
unsigned j = 0;
j < ArgSize;
j += PtrByteSize) {
4452 if (GPR_idx == Num_GPR_Regs)
4463 unsigned StoreSizeInBits =
std::min(PtrByteSize, (ObjSize -
j)) * 8;
4468 MemOps.push_back(
Store);
4471 ArgOffset += ArgSize;
4480 if (
Flags.isNest()) {
4486 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4494 if (GPR_idx != Num_GPR_Regs) {
4502 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4508 ArgSize = PtrByteSize;
4519 if (FPR_idx != Num_FPR_Regs) {
4524 Subtarget.hasP8Vector()
4525 ? &PPC::VSSRCRegClass
4526 : &PPC::F4RCRegClass);
4529 ? &PPC::VSFRCRegClass
4530 : &PPC::F8RCRegClass);
4546 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4564 ArgSize =
Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4565 ArgOffset += ArgSize;
4566 if (
Flags.isInConsecutiveRegsLast())
4567 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4581 if (VR_idx != Num_VR_Regs) {
4598 if (ObjSize < ArgSize && !isLittleEndian)
4599 CurArgOffset += ArgSize - ObjSize;
4605 InVals.push_back(ArgVal);
4609 unsigned MinReservedArea;
4610 if (HasParameterArea)
4611 MinReservedArea =
std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4613 MinReservedArea = LinkageSize;
4630 int Depth = ArgOffset;
4639 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4640 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4645 MemOps.push_back(
Store);
4652 if (!MemOps.empty())
4661 unsigned ParamSize) {
4663 if (!isTailCall)
return 0;
4667 int SPDiff = (
int)CallerMinReservedArea - (
int)ParamSize;
4669 if (SPDiff < FI->getTailCallSPDelta())
4684 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4700 if (!
TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4706 const Function *
F = dyn_cast<Function>(GV);
4707 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4712 F = dyn_cast<Function>(GlobalObj);
4745 if (
TM.getFunctionSections() || GV->
hasComdat() || Caller->hasComdat() ||
4748 if (
const auto *
F = dyn_cast<Function>(GV)) {
4749 if (
F->getSectionPrefix() != Caller->getSectionPrefix())
4761 const unsigned PtrByteSize = 8;
4765 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4766 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4770 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4773 const unsigned NumGPRs =
std::size(GPR);
4774 const unsigned NumFPRs = 13;
4776 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4778 unsigned NumBytes = LinkageSize;
4779 unsigned AvailableFPRs = NumFPRs;
4780 unsigned AvailableVRs = NumVRs;
4783 if (Param.Flags.isNest())
continue;
4786 LinkageSize, ParamAreaSize, NumBytes,
4787 AvailableFPRs, AvailableVRs))
4798 auto CalleeArgEnd = CB.
arg_end();
4801 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4802 const Value* CalleeArg = *CalleeArgIter;
4803 const Value* CallerArg = &(*CallerArgIter);
4804 if (CalleeArg == CallerArg)
4812 isa<UndefValue>(CalleeArg))
4830 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4840 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4846 if (
DisableSCO && !TailCallOpt)
return false;
4849 if (isVarArg)
return false;
4881 if (
Caller.getCallingConv() != CalleeCC &&
4928 PPCTargetLowering::IsEligibleForTailCallOptimization(
SDValue Callee,
4944 for (
unsigned i = 0;
i !=
Ins.size();
i++) {
4946 if (
Flags.isByVal())
return false;
4956 return G->getGlobal()->hasHiddenVisibility()
4957 ||
G->getGlobal()->hasProtectedVisibility();
4967 if (!
C)
return nullptr;
4969 int Addr =
C->getZExtValue();
4970 if ((
Addr & 3) != 0 ||
4976 (
int)
C->getZExtValue() >> 2,
SDLoc(
Op),
4983 struct TailCallArgumentInfo {
4988 TailCallArgumentInfo() =
default;
4998 for (
unsigned i = 0,
e = TailCallArgs.size();
i !=
e; ++
i) {
5000 SDValue FIN = TailCallArgs[
i].FrameIdxOp;
5001 int FI = TailCallArgs[
i].FrameIdx;
5003 MemOpChains.push_back(DAG.
getStore(
5004 Chain, dl,
Arg, FIN,
5013 int SPDiff,
const SDLoc &dl) {
5019 bool isPPC64 = Subtarget.
isPPC64();
5020 int SlotSize = isPPC64 ? 8 : 4;
5021 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5023 NewRetAddrLoc,
true);
5026 Chain = DAG.
getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5038 int Offset = ArgOffset + SPDiff;
5039 uint32_t OpSize = (
Arg.getValueSizeInBits() + 7) / 8;
5043 TailCallArgumentInfo
Info;
5045 Info.FrameIdxOp = FIN;
5047 TailCallArguments.push_back(
Info);
5053 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5059 LROpOut = getReturnAddrFrameIndex(DAG);
5076 return DAG.
getMemcpy(Chain, dl, Dst, Src, SizeNode,
5077 Flags.getNonZeroByValAlign(),
false,
false,
false,
5085 SDValue PtrOff,
int SPDiff,
unsigned ArgOffset,
bool isPPC64,
5099 MemOpChains.push_back(
5108 const SDLoc &dl,
int SPDiff,
unsigned NumBytes,
SDValue LROp,
5118 if (!MemOpChains2.empty())
5137 return G->getGlobal()->getValueType()->isFunctionTy();
5143 SDValue PPCTargetLowering::LowerCallResult(
5151 CCRetInfo.AnalyzeCallResult(
5157 for (
unsigned i = 0,
e = RVLocs.size();
i !=
e; ++
i) {
5166 Chain =
Lo.getValue(1);
5167 InFlag =
Lo.getValue(2);
5171 Chain =
Hi.getValue(1);
5172 InFlag =
Hi.getValue(2);
5201 InVals.push_back(Val);
5238 bool IsStrictFPCall =
false) {
5242 unsigned RetOpc = 0;
5271 if (IsStrictFPCall) {
5302 auto isLocalCallee = [&]() {
5308 !isa_and_nonnull<GlobalIFunc>(GV);
5319 const auto getAIXFuncEntryPointSymbolSDNode = [&](
const GlobalValue *GV) {
5333 assert(!isa<GlobalIFunc>(GV) &&
"IFunc is not supported on AIX.");
5334 return getAIXFuncEntryPointSymbolSDNode(GV);
5341 const char *SymName =
S->getSymbol();
5348 return getAIXFuncEntryPointSymbolSDNode(
F);
5354 const auto getExternalFunctionEntryPointSymbol = [&](
StringRef SymName) {
5362 SymName = getExternalFunctionEntryPointSymbol(SymName)->
getName().
data();
5375 "Expected a CALLSEQ_STARTSDNode.");
5434 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5454 Alignment, MMOFlags);
5461 DAG.
getLoad(RegVT, dl, LDChain, AddTOC,
5468 DAG.
getLoad(RegVT, dl, LDChain, AddPtr,
5480 "Nest parameter is not supported on AIX.");
5496 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5499 const bool IsPPC64 = Subtarget.
isPPC64();
5504 Ops.push_back(Chain);
5528 Ops.push_back(AddTOC);
5539 Ops.push_back(DAG.
getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5548 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i)
5550 RegsToPass[
i].second.getValueType()));
5567 assert(
Mask &&
"Missing call preserved mask for calling convention");
5572 Ops.push_back(Glue);
5575 SDValue PPCTargetLowering::FinishCall(
5590 if (!CFlags.IsIndirect)
5594 dl, CFlags.HasNest, Subtarget);
5604 if (CFlags.IsTailCall) {
5608 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5611 isa<ConstantSDNode>(Callee) ||
5613 "Expecting a global address, external symbol, absolute value, "
5614 "register or an indirect tail call when PC Relative calls are "
5618 "Unexpected call opcode for a tail call.");
5624 Chain = DAG.
getNode(CallOpc, dl, ReturnTypes, Ops);
5636 Chain = DAG.
getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5639 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg,
Ins, dl,
5663 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5664 Callee, CallConv, CB, isVarArg, Outs,
Ins, DAG);
5666 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5680 isa<GlobalAddressSDNode>(Callee)) &&
5681 "Callee should be an llvm::Function object.");
5684 <<
"\nTCO callee: ");
5691 "site marked musttail");
5696 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5698 Callee = LowerGlobalAddress(Callee, DAG);
5701 CallConv, isTailCall, isVarArg, isPatchPoint,
5709 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5714 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5716 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5720 SDValue PPCTargetLowering::LowerCall_32SVR4(
5731 const bool IsVarArg = CFlags.IsVarArg;
5732 const bool IsTailCall = CFlags.IsTailCall;
5738 const Align PtrAlign(4);
5763 CCInfo.PreAnalyzeCallOperands(Outs);
5769 unsigned NumArgs = Outs.size();
5771 for (
unsigned i = 0;
i != NumArgs; ++
i) {
5772 MVT ArgVT = Outs[
i].VT;
5776 if (Outs[
i].IsFixed) {
5786 errs() <<
"Call operand #" <<
i <<
" has unhandled type "
5796 CCInfo.clearWasPPCF128();
5803 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5810 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5824 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5835 bool seenFloatArg =
false;
5840 for (
unsigned i = 0, RealArgIdx = 0,
j = 0,
e = ArgLocs.size();
5842 ++
i, ++RealArgIdx) {
5847 if (
Flags.isByVal()) {
5852 assert((
j < ByValArgLocs.size()) &&
"Index out of bounds!");
5875 Chain = CallSeqStart = NewCallSeqStart;
5894 if (Subtarget.hasSPE() &&
Arg.getValueType() ==
MVT::f64) {
5901 RegsToPass.push_back(std::make_pair(ArgLocs[++
i].getLocReg(),
5904 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
5915 MemOpChains.push_back(
5925 if (!MemOpChains.empty())
5931 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i) {
5933 RegsToPass[
i].second, InFlag);
5941 SDValue Ops[] = { Chain, InFlag };
5953 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5954 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
5959 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5971 return NewCallSeqStart;
5974 SDValue PPCTargetLowering::LowerCall_64SVR4(
5983 unsigned NumOps = Outs.size();
5984 bool IsSibCall =
false;
5988 unsigned PtrByteSize = 8;
6003 assert(!(IsFastCall && CFlags.IsVarArg) &&
6004 "fastcc not supported on varargs functions");
6011 unsigned NumBytes = LinkageSize;
6012 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6015 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6016 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6020 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6023 const unsigned NumGPRs =
std::size(GPR);
6031 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6032 if (!HasParameterArea) {
6033 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6034 unsigned AvailableFPRs = NumFPRs;
6035 unsigned AvailableVRs = NumVRs;
6036 unsigned NumBytesTmp = NumBytes;
6037 for (
unsigned i = 0;
i != NumOps; ++
i) {
6038 if (Outs[
i].
Flags.isNest())
continue;
6040 PtrByteSize, LinkageSize, ParamAreaSize,
6041 NumBytesTmp, AvailableFPRs, AvailableVRs))
6042 HasParameterArea =
true;
6048 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6053 HasParameterArea =
false;
6056 for (
unsigned i = 0;
i != NumOps; ++
i) {
6058 EVT ArgVT = Outs[
i].VT;
6059 EVT OrigVT = Outs[
i].ArgVT;
6065 if (
Flags.isByVal()) {
6066 NumGPRsUsed += (
Flags.getByValSize()+7)/8;
6067 if (NumGPRsUsed > NumGPRs)
6068 HasParameterArea =
true;
6075 if (++NumGPRsUsed <= NumGPRs)
6085 if (++NumVRsUsed <= NumVRs)
6089 if (++NumVRsUsed <= NumVRs)
6094 if (++NumFPRsUsed <= NumFPRs)
6098 HasParameterArea =
true;
6105 NumBytes =
alignTo(NumBytes, Alignement);
6108 if (
Flags.isInConsecutiveRegsLast())
6109 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6112 unsigned NumBytesActuallyUsed = NumBytes;
6122 if (HasParameterArea)
6123 NumBytes =
std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6125 NumBytes = LinkageSize;
6140 if (CFlags.IsTailCall)
6152 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6163 unsigned ArgOffset = LinkageSize;
6169 for (
unsigned i = 0;
i != NumOps; ++
i) {
6172 EVT ArgVT = Outs[
i].VT;
6173 EVT OrigVT = Outs[
i].ArgVT;
6182 auto ComputePtrOff = [&]() {
6186 ArgOffset =
alignTo(ArgOffset, Alignment);
6197 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6198 GPR_idx =
std::min(GPR_idx, NumGPRs);
6211 if (
Flags.isByVal()) {
6228 if (Size==1 || Size==2 || Size==4) {
6230 if (GPR_idx != NumGPRs) {
6233 MemOpChains.push_back(
Load.getValue(1));
6234 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6236 ArgOffset += PtrByteSize;
6241 if (GPR_idx == NumGPRs && Size < 8) {
6243 if (!isLittleEndian) {
6248 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, AddPtr,
6251 ArgOffset += PtrByteSize;
6260 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6261 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, PtrOff,
6266 if (Size < 8 && GPR_idx != NumGPRs) {
6276 if (!isLittleEndian) {
6280 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, AddPtr,
6287 MemOpChains.push_back(
Load.getValue(1));
6288 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6291 ArgOffset += PtrByteSize;
6297 for (
unsigned j=0;
j<
Size;
j+=PtrByteSize) {
6300 if (GPR_idx != NumGPRs) {
6301 unsigned LoadSizeInBits =
std::min(PtrByteSize, (Size -
j)) * 8;
6306 MemOpChains.push_back(
Load.getValue(1));
6307 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6308 ArgOffset += PtrByteSize;
6310 ArgOffset += ((
Size -
j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6317 switch (
Arg.getSimpleValueType().SimpleTy) {
6322 if (
Flags.isNest()) {
6324 RegsToPass.push_back(std::make_pair(PPC::X11,
Arg));
6331 if (GPR_idx != NumGPRs) {
6332 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Arg));
6337 assert(HasParameterArea &&
6338 "Parameter area must exist to pass an argument in memory.");
6340 true, CFlags.IsTailCall,
false, MemOpChains,
6341 TailCallArguments, dl);
6343 ArgOffset += PtrByteSize;
6346 ArgOffset += PtrByteSize;
6359 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6360 bool NeededLoad =
false;
6363 if (FPR_idx != NumFPRs)
6364 RegsToPass.push_back(std::make_pair(
FPR[FPR_idx++],
Arg));
6367 if (!NeedGPROrStack)
6369 else if (GPR_idx != NumGPRs && !IsFastCall) {
6383 }
else if (!
Flags.isInConsecutiveRegs()) {
6389 }
else if (ArgOffset % PtrByteSize != 0) {
6393 if (!isLittleEndian)
6398 }
else if (
Flags.isInConsecutiveRegsLast()) {
6401 if (!isLittleEndian)
6411 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6419 !isLittleEndian && !
Flags.isInConsecutiveRegs()) {
6424 assert(HasParameterArea &&
6425 "Parameter area must exist to pass an argument in memory.");
6427 true, CFlags.IsTailCall,
false, MemOpChains,
6428 TailCallArguments, dl);
6435 if (!IsFastCall || NeededLoad) {
6437 Flags.isInConsecutiveRegs()) ? 4 : 8;
6438 if (
Flags.isInConsecutiveRegsLast())
6439 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6459 if (CFlags.IsVarArg) {
6460 assert(HasParameterArea &&
6461 "Parameter area must exist if we have a varargs call.");
6466 MemOpChains.push_back(
Store);
6467 if (VR_idx != NumVRs) {
6470 MemOpChains.push_back(
Load.getValue(1));
6471 RegsToPass.push_back(std::make_pair(VR[VR_idx++],
Load));
6474 for (
unsigned i=0;
i<16;
i+=PtrByteSize) {
6475 if (GPR_idx == NumGPRs)
6481 MemOpChains.push_back(
Load.getValue(1));
6482 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6488 if (VR_idx != NumVRs) {
6489 RegsToPass.push_back(std::make_pair(VR[VR_idx++],
Arg));
6494 assert(HasParameterArea &&
6495 "Parameter area must exist to pass an argument in memory.");
6497 true, CFlags.IsTailCall,
true, MemOpChains,
6498 TailCallArguments, dl);
6509 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6510 "mismatch in size of parameter area");
6511 (void)NumBytesActuallyUsed;
6513 if (!MemOpChains.empty())
6519 if (CFlags.IsIndirect) {
6523 assert(!CFlags.IsTailCall &&
"Indirect tails calls not supported");
6538 if (isELFv2ABI && !CFlags.IsPatchPoint)
6539 RegsToPass.push_back(std::make_pair((
unsigned)PPC::X12, Callee));
6545 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i) {
6547 RegsToPass[
i].second, InFlag);
6551 if (CFlags.IsTailCall && !IsSibCall)
6555 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6556 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
6563 "Required alignment greater than stack alignment.");
6583 return RequiredAlign <= 8;
6588 return RequiredAlign <= 4;
6598 const bool IsPPC64 = Subtarget.
isPPC64();
6610 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6612 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6613 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6617 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6618 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6623 "register width are not supported.");
6629 if (ByValSize == 0) {
6636 const unsigned StackSize =
alignTo(ByValSize, PtrAlign);
6658 assert(IsPPC64 &&
"PPC32 should have split i64 values.");
6665 LocInfo = ArgFlags.
isSExt() ? CCValAssign::LocInfo::SExt
6666 : CCValAssign::LocInfo::ZExt;
6687 for (
unsigned I = 0;
I < StoreSize;
I += PtrAlign.
value()) {
6688 if (
unsigned Reg = State.
AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6689 assert(FReg &&
"An FPR should be available when a GPR is reserved.");
6740 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6746 while (NextRegIndex != GPRs.
size() &&
6751 assert(
Reg &&
"Allocating register unexpectedly failed.");
6764 for (
unsigned I = 0;
I !=
VecSize;
I += PtrSize)
6776 if (NextRegIndex == GPRs.
size()) {
6785 if (GPRs[NextRegIndex] == PPC::R9) {
6790 const unsigned FirstReg = State.
AllocateReg(PPC::R9);
6791 const unsigned SecondReg = State.
AllocateReg(PPC::R10);
6792 assert(FirstReg && SecondReg &&
6793 "Allocating R9 or R10 unexpectedly failed.");
6807 for (
unsigned I = 0;
I !=
VecSize;
I += PtrSize) {
6809 assert(
Reg &&
"Failed to allocated register for vararg vector argument");
6825 "i64 should have been split for 32-bit codegen.");
6833 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6835 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6837 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6845 return &PPC::VRRCRegClass;
6858 else if (Flags.isZExt())
6870 "Reg must be a valid argument register!");
6871 return LASize + 4 * (
Reg - PPC::R3);
6876 "Reg must be a valid argument register!");
6877 return LASize + 8 * (
Reg - PPC::X3);
6923 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6930 "Unexpected calling convention!");
6940 const bool IsPPC64 = Subtarget.
isPPC64();
6941 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6953 CCInfo.AllocateStack(LinkageSize,
Align(PtrByteSize));
6954 CCInfo.AnalyzeFormalArguments(
Ins,
CC_AIX);
6958 for (
size_t I = 0, End = ArgLocs.size();
I != End; ) {
6972 auto HandleMemLoc = [&]() {
6975 assert((ValSize <= LocSize) &&
6976 "Object size is larger than size of MemLoc");
6979 if (LocSize > ValSize)
6980 CurArgOffset += LocSize - ValSize;
6982 const bool IsImmutable =
6989 InVals.push_back(ArgValue);
6997 assert(isVarArg &&
"Only use custom memloc for vararg.");
7000 const unsigned OriginalValNo = VA.
getValNo();
7001 (void)OriginalValNo;
7003 auto HandleCustomVecRegLoc = [&]() {
7004 assert(
I != End && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
7005 "Missing custom RegLoc.");
7008 "Unexpected Val type for custom RegLoc.");
7010 "ValNo mismatch between custom MemLoc and RegLoc.");
7014 Subtarget.hasVSX()));
7021 HandleCustomVecRegLoc();
7022 HandleCustomVecRegLoc();
7026 if (
I != End && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom()) {
7028 "Only 2 custom RegLocs expected for 64-bit codegen.");
7029 HandleCustomVecRegLoc();
7030 HandleCustomVecRegLoc();
7074 const unsigned Size =
7081 InVals.push_back(FIN);
7086 if (
Flags.isByVal()) {
7092 if (
Flags.getNonZeroByValAlign() > PtrByteSize)
7095 const unsigned StackSize =
alignTo(
Flags.getByValSize(), PtrByteSize);
7100 InVals.push_back(FIN);
7104 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7106 auto HandleRegLoc = [&, RegClass, LocVT](
const MCPhysReg PhysReg,
7119 CopyFrom.
getValue(1), dl, CopyFrom,
7123 MemOps.push_back(
Store);
7129 for (;
Offset != StackSize && ArgLocs[
I].isRegLoc();
7132 "RegLocs should be for ByVal argument.");
7139 if (
Offset != StackSize) {
7141 "Expected MemLoc for remaining bytes.");
7142 assert(ArgLocs[
I].isMemLoc() &&
"Expected MemLoc for remaining bytes.");
7156 Subtarget.hasVSX()));
7163 InVals.push_back(ArgValue);
7173 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7175 unsigned CallerReservedArea =
7176 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7182 CallerReservedArea =
7192 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7194 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7195 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7196 const unsigned NumGPArgRegs =
std::size(IsPPC64 ? GPR_64 : GPR_32);
7201 for (
unsigned GPRIndex =
7202 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7203 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7206 IsPPC64 ? MF.
addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7207 : MF.
addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7212 MemOps.push_back(
Store);
7219 if (!MemOps.empty())
7225 SDValue PPCTargetLowering::LowerCall_AIX(
7238 "Unexpected calling convention!");
7240 if (CFlags.IsPatchPoint)
7247 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7255 const bool IsPPC64 = Subtarget.
isPPC64();
7257 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7258 CCInfo.AllocateStack(LinkageSize,
Align(PtrByteSize));
7259 CCInfo.AnalyzeCallOperands(Outs,
CC_AIX);
7267 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7268 const unsigned NumBytes =
std::max(LinkageSize + MinParameterSaveAreaSize,
7269 CCInfo.getNextStackOffset());
7285 for (
unsigned I = 0,
E = ArgLocs.size();
I !=
E;) {
7286 const unsigned ValNo = ArgLocs[
I].getValNo();
7290 if (
Flags.isByVal()) {
7291 const unsigned ByValSize =
Flags.getByValSize();
7299 auto GetLoad = [&](
EVT VT,
unsigned LoadOffset) {
7308 unsigned LoadOffset = 0;
7311 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[
I].isRegLoc()) {
7313 MemOpChains.push_back(
Load.getValue(1));
7314 LoadOffset += PtrByteSize;
7317 "Unexpected location for pass-by-value argument.");
7318 RegsToPass.push_back(std::make_pair(ByValVA.
getLocReg(),
Load));
7321 if (LoadOffset == ByValSize)
7325 assert(ArgLocs[
I].getValNo() == ValNo &&
7326 "Expected additional location for by-value argument.");
7328 if (ArgLocs[
I].isMemLoc()) {
7329 assert(LoadOffset < ByValSize &&
"Unexpected memloc for by-val arg.");
7334 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7340 CallSeqStart, MemcpyFlags, DAG, dl);
7349 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7350 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7351 "Unexpected register residue for by-value argument.");
7353 for (
unsigned Bytes = 0; Bytes != ResidueBytes;) {
7359 MemOpChains.push_back(
Load.getValue(1));
7367 "Unexpected load emitted during handling of pass-by-value "
7375 ResidueVal = ResidueVal ? DAG.
getNode(
ISD::OR, dl, PtrVT, ResidueVal,
7381 RegsToPass.push_back(std::make_pair(ByValVA.
getLocReg(), ResidueVal));
7403 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
7410 assert(CFlags.IsVarArg &&
"Custom MemLocs only used for Vector args.");
7417 MemOpChains.push_back(
Store);
7418 const unsigned OriginalValNo = VA.
getValNo();
7420 unsigned LoadOffset = 0;
7421 auto HandleCustomVecRegLoc = [&]() {
7422 assert(
I !=
E &&
"Unexpected end of CCvalAssigns.");
7423 assert(ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
7424 "Expected custom RegLoc.");
7427 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7431 MemOpChains.push_back(
Load.getValue(1));
7432 RegsToPass.push_back(std::make_pair(RegVA.
getLocReg(),
Load));
7433 LoadOffset += PtrByteSize;
7439 HandleCustomVecRegLoc();
7440 HandleCustomVecRegLoc();
7442 if (
I !=
E && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
7443 ArgLocs[
I].getValNo() == OriginalValNo) {
7445 "Only 2 custom RegLocs expected for 64-bit codegen.");
7446 HandleCustomVecRegLoc();
7447 HandleCustomVecRegLoc();
7457 MemOpChains.push_back(
7465 "Unexpected register handling for calling convention.");
7471 "Custom register handling only expected for VarArg.");
7479 RegsToPass.push_back(std::make_pair(VA.
getLocReg(), ArgAsInt));
7480 else if (
Arg.getValueType().getFixedSizeInBits() <
7483 RegsToPass.push_back(std::make_pair(
7489 "Unexpected custom register for argument!");
7493 RegsToPass.push_back(std::make_pair(
7503 RegsToPass.push_back(std::make_pair(
7510 if (!MemOpChains.empty())
7515 if (CFlags.IsIndirect) {
7516 assert(!CFlags.IsTailCall &&
"Indirect tail-calls not supported.");
7520 const unsigned TOCSaveOffset =
7536 for (
auto Reg : RegsToPass) {
7541 const int SPDiff = 0;
7542 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7543 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
7553 return CCInfo.CheckReturn(
7568 CCInfo.AnalyzeReturn(Outs,
7577 for (
unsigned i = 0, RealResIdx = 0;
i != RVLocs.size(); ++
i, ++RealResIdx) {
7619 RetOps.push_back(
Flag);
7625 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(
SDValue Op,
7630 EVT IntVT =
Op.getValueType();
7634 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7636 SDValue Ops[2] = {Chain, FPSIdx};
7650 bool isPPC64 = Subtarget.
isPPC64();
7651 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7671 bool isPPC64 = Subtarget.
isPPC64();
7692 PPCTargetLowering::getFramePointerFrameIndex(
SelectionDAG & DAG)
const {
7694 bool isPPC64 = Subtarget.
isPPC64();
7728 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7729 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7740 bool isPPC64 = Subtarget.
isPPC64();
7752 Op.getOperand(0),
Op.getOperand(1));
7759 Op.getOperand(0),
Op.getOperand(1));
7763 if (
Op.getValueType().isVector())
7764 return LowerVectorLoad(
Op, DAG);
7767 "Custom lowering only for i1 loads");
7788 if (
Op.getOperand(1).getValueType().isVector())
7789 return LowerVectorStore(
Op, DAG);
7792 "Custom lowering only for i1 stores");
7812 "Custom lowering only for i1 results");
7840 EVT TrgVT =
Op.getValueType();
7853 !llvm::has_single_bit<uint32_t>(
7864 if (SrcSize == 256) {
7875 Op1 = SrcSize == 128 ? N1 :
widenVec(DAG, N1,
DL);
7883 for (
unsigned i = 0;
i < TrgNumElts; ++
i)
7884 ShuffV.push_back(
i * SizeMult);
7886 for (
unsigned i = 1;
i <= TrgNumElts; ++
i)
7887 ShuffV.push_back(
i * SizeMult - 1);
7890 for (
unsigned i = TrgNumElts;
i < WideNumElts; ++
i)
7892 ShuffV.push_back(WideNumElts + 1);
7903 EVT ResVT =
Op.getValueType();
7904 EVT CmpVT =
Op.getOperand(0).getValueType();
7906 SDValue TV =
Op.getOperand(2), FV =
Op.getOperand(3);
7912 if (!Subtarget.hasP9Vector() && CmpVT ==
MVT::f128) {
7929 if (Subtarget.hasP9Vector() &&
LHS == TV &&
RHS == FV) {
8057 bool IsStrict =
Op->isStrictFPOpcode();
8063 Flags.setNoFPExcept(
Op->getFlags().hasNoFPExcept());
8066 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8068 assert(Src.getValueType().isFloatingPoint());
8069 if (Src.getValueType() ==
MVT::f32) {
8074 Chain = Src.getValue(1);
8080 switch (
Op.getSimpleValueType().SimpleTy) {
8087 assert((IsSigned || Subtarget.hasFPCVT()) &&
8088 "i64 FP_TO_UINT is supported only with FPCVT");
8094 {Chain, Src}, Flags);
8101 void PPCTargetLowering::LowerFP_TO_INTForReuse(
SDValue Op, ReuseLoadInfo &RLI,
8103 const SDLoc &dl)
const {
8107 bool IsStrict =
Op->isStrictFPOpcode();
8110 bool i32Stack =
Op.getValueType() ==
MVT::i32 && Subtarget.hasSTFIWX() &&
8111 (IsSigned || Subtarget.hasFPCVT());
8113 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8122 Alignment =
Align(4);
8125 SDValue Ops[] = { Chain, Tmp, FIPtr };
8129 Chain = DAG.
getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8133 if (
Op.getValueType() ==
MVT::i32 && !i32Stack) {
8142 RLI.Alignment = Alignment;
8150 const SDLoc &dl)
const {
8153 if (
Op->isStrictFPOpcode())
8160 const SDLoc &dl)
const {
8161 bool IsStrict =
Op->isStrictFPOpcode();
8164 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8165 EVT SrcVT = Src.getValueType();
8166 EVT DstVT =
Op.getValueType();
8170 return Subtarget.hasP9Vector() ?
Op :
SDValue();
8180 Flags.setNoFPExcept(
Op->getFlags().hasNoFPExcept());
8193 {Op.getOperand(0), Lo, Hi}, Flags);
8196 {Res.getValue(1), Res}, Flags);
8202 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8226 {Chain, Src, FltOfs}, Flags);
8230 {Chain, Val}, Flags);
8233 dl, DstVT, Sel, DAG.
getConstant(0, dl, DstVT), SignMask);
8251 if (Subtarget.hasDirectMove() && Subtarget.
isPPC64())
8252 return LowerFP_TO_INTDirectMove(
Op, DAG, dl);
8255 LowerFP_TO_INTForReuse(
Op, RLI, DAG, dl);
8257 return DAG.
getLoad(
Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8258 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8269 bool PPCTargetLowering::canReuseLoadAddress(
SDValue Op,
EVT MemVT,
8274 if (
Op->isStrictFPOpcode())
8279 (Subtarget.hasFPCVT() ||
Op.getValueType() ==
MVT::i32);
8283 Op.getOperand(0).getValueType())) {
8285 LowerFP_TO_INTForReuse(
Op, RLI, DAG, dl);
8290 if (!
LD ||
LD->getExtensionType() != ET ||
LD->isVolatile() ||
8291 LD->isNonTemporal())
8293 if (
LD->getMemoryVT() != MemVT)
8303 RLI.Ptr =
LD->getBasePtr();
8304 if (
LD->isIndexed() && !
LD->getOffset().isUndef()) {
8306 "Non-pre-inc AM on PPC?");
8311 RLI.Chain =
LD->getChain();
8312 RLI.MPI =
LD->getPointerInfo();
8313 RLI.IsDereferenceable =
LD->isDereferenceable();
8314 RLI.IsInvariant =
LD->isInvariant();
8315 RLI.Alignment =
LD->getAlign();
8316 RLI.AAInfo =
LD->getAAInfo();
8317 RLI.Ranges =
LD->getRanges();
8319 RLI.ResChain =
SDValue(
LD,
LD->isIndexed() ? 2 : 1);
8327 void PPCTargetLowering::spliceIntoChain(
SDValue ResChain,
8333 SDLoc dl(NewResChain);
8338 "A new TF really is required here");
8347 bool PPCTargetLowering::directMoveIsProfitable(
const SDValue &
Op)
const {
8348 SDNode *Origin =
Op.getOperand(
Op->isStrictFPOpcode() ? 1 : 0).getNode();
8355 if (!Subtarget.hasP9Vector() && MMO->
getSize() <= 2)
8363 if (UI.getUse().get().getResNo() != 0)
8385 Flags.setNoFPExcept(
Op->getFlags().hasNoFPExcept());
8389 bool IsSingle =
Op.getValueType() ==
MVT::f32 && Subtarget.hasFPCVT();
8393 if (
Op->isStrictFPOpcode()) {
8395 Chain =
Op.getOperand(0);
8399 return DAG.
getNode(ConvOpc, dl, ConvTy, Src);
8407 const SDLoc &dl)
const {
8410 "Invalid floating point type as target of conversion");
8411 assert(Subtarget.hasFPCVT() &&
8412 "Int to FP conversions with direct moves require FPCVT");
8413 SDValue Src =
Op.getOperand(
Op->isStrictFPOpcode() ? 1 : 0);
8414 bool WordInt = Src.getSimpleValueType().SimpleTy ==
MVT::i32;
8436 for (
unsigned i = 1;
i < NumConcat; ++
i)
8443 const SDLoc &dl)
const {
8444 bool IsStrict =
Op->isStrictFPOpcode();
8445 unsigned Opc =
Op.getOpcode();
8446 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8449 "Unexpected conversion type");
8451 "Supports conversions to v2f64/v4f32 only.");
8455 Flags.setNoFPExcept(
Op->getFlags().hasNoFPExcept());
8466 for (
unsigned i = 0;
i < WideNumElts; ++
i)
8467 ShuffV.push_back(
i + WideNumElts);
8469 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8470 int SaveElts = FourEltRes ? 4 : 2;
8472 for (
int i = 0;
i < SaveElts;
i++)
8473 ShuffV[
i * Stride] =
i;
8475 for (
int i = 1;
i <= SaveElts;
i++)
8476 ShuffV[
i * Stride - 1] =
i - 1;
8484 Arrange = DAG.
getBitcast(IntermediateVT, Arrange);
8485 EVT ExtVT = Src.getValueType();
8486 if (Subtarget.hasP9Altivec())
8497 {Op.getOperand(0), Extend}, Flags);
8499 return DAG.
getNode(Opc, dl,
Op.getValueType(), Extend);
8507 bool IsStrict =
Op->isStrictFPOpcode();
8508 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8513 Flags.setNoFPExcept(
Op->getFlags().hasNoFPExcept());
8515 EVT InVT = Src.getValueType();
8516 EVT OutVT =
Op.getValueType();
8519 return LowerINT_TO_FPVector(
Op, DAG, dl);
8523 return Subtarget.hasP9Vector() ?
Op :
SDValue();
8529 if (Src.getValueType() ==
MVT::i1) {
8541 if (Subtarget.hasDirectMove() && directMoveIsProfitable(
Op) &&
8542 Subtarget.
isPPC64() && Subtarget.hasFPCVT())
8543 return LowerINT_TO_FPDirectMove(
Op, DAG, dl);
8545 assert((IsSigned || Subtarget.hasFPCVT()) &&
8546 "UINT_TO_FP is supported only with FPCVT");
8548 if (Src.getValueType() ==
MVT::i64) {
8561 !Subtarget.hasFPCVT() &&
8602 if (canReuseLoadAddress(SINT,
MVT::i64, RLI, DAG)) {
8604 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8605 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8606 }
else if (Subtarget.hasLFIWAX() &&
8610 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8611 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8615 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8616 }
else if (Subtarget.hasFPCVT() &&
8620 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8621 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8625 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8626 }
else if (((Subtarget.hasLFIWAX() &&
8628 (Subtarget.hasFPCVT() &&
8643 "Expected an i32 store");
8649 RLI.Alignment =
Align(4);
8653 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8654 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8659 Chain =
Bits.getValue(1);
8665 Chain =
FP.getValue(1);
8667 if (
Op.getValueType() ==
MVT::f32 && !Subtarget.hasFPCVT()) {
8671 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8680 "Unhandled INT_TO_FP type in custom expander!");
8690 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8693 if (!(ReusingLoad = canReuseLoadAddress(Src,
MVT::i32, RLI, DAG))) {
8703 "Expected an i32 store");
8709 RLI.Alignment =
Align(4);
8714 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8715 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8721 spliceIntoChain(RLI.ResChain, Ld.
getValue(1), DAG);
8724 "i32->FP without LFIWAX supported only on PPC64");
8733 Chain, dl, Ext64, FIdx,
8747 Chain =
FP.getValue(1);
8748 if (
Op.getValueType() ==
MVT::f32 && !Subtarget.hasFPCVT()) {
8752 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8783 EVT VT =
Op.getValueType();
8789 Chain =
MFFS.getValue(1);
8803 "Stack slot adjustment is valid only on big endian subtargets!");
8833 EVT VT =
Op.getValueType();
8837 VT ==
Op.getOperand(1).getValueType() &&
8857 SDValue OutOps[] = { OutLo, OutHi };
8862 EVT VT =
Op.getValueType();
8866 VT ==
Op.getOperand(1).getValueType() &&
8886 SDValue OutOps[] = { OutLo, OutHi };
8892 EVT VT =
Op.getValueType();
8895 VT ==
Op.getOperand(1).getValueType() &&
8915 SDValue OutOps[] = { OutLo, OutHi };
8922 EVT VT =
Op.getValueType();
8929 EVT AmtVT =
Z.getValueType();
8952 static const MVT VTys[] = {
8959 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8964 EVT CanonicalVT = VTys[SplatSize-1];
9008 for (
unsigned i = 0;
i != 16; ++
i)
9036 bool IsSplat =
true;
9037 bool IsLoad =
false;
9064 return !(IsSplat && IsLoad);
9102 APFloat APFloatToConvert = ArgAPFloat;
9103 bool LosesInfo =
true;
9108 ArgAPFloat = APFloatToConvert;
9130 APFloat APFloatToConvert = ArgAPFloat;
9131 bool LosesInfo =
true;
9135 return (!LosesInfo && !APFloatToConvert.
isDenormal());
9140 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(
Op.getOperand(0));
9144 EVT Ty =
Op->getValueType(0);
9183 assert(BVN &&
"Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9186 APInt APSplatBits, APSplatUndef;
9187 unsigned SplatBitSize;
9189 bool BVNIsConstantSplat =
9197 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9198 Subtarget.hasPrefixInstrs()) {
9236 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9243 const SDValue *InputLoad = &
Op.getOperand(0);
9248 unsigned MemorySize =
LD->getMemoryVT().getScalarSizeInBits();
9249 unsigned ElementSize =
9252 assert(((ElementSize == 2 * MemorySize)
9256 "Unmatched element size and opcode!\n");
9261 unsigned NumUsesOfInputLD = 128 / ElementSize;
9263 if (BVInOp.isUndef())
9278 if (NumUsesOfInputLD == 1 &&
9281 Subtarget.hasLFIWAX()))
9290 Subtarget.isISA3_1() && ElementSize <= 16)
9293 assert(NumUsesOfInputLD > 0 &&
"No uses of input LD of a build_vector?");
9295 Subtarget.hasVSX()) {
9303 LD->getMemoryVT(),
LD->getMemOperand());
9315 if (Subtarget.hasVSX() && Subtarget.
isPPC64() &&
9317 Subtarget.hasP8Vector()))
9324 unsigned SplatSize = SplatBitSize / 8;
9329 if (SplatBits == 0) {
9343 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9345 Op.getValueType(), DAG, dl);
9347 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9352 if (Subtarget.hasP9Vector() && SplatSize == 1)
9357 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9359 if (SextVal >= -16 && SextVal <= 15)
9372 if (SextVal >= -32 && SextVal <= 31) {
9381 if (VT ==
Op.getValueType())
9390 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9404 static const signed char SplatCsts[] = {
9405 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9406 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9409 for (
unsigned idx = 0; idx <
std::size(SplatCsts); ++idx) {
9412 int i = SplatCsts[idx];
9416 unsigned TypeShiftAmt =
i & (SplatBitSize-1);
9419 if (SextVal == (
int)((
unsigned)
i << TypeShiftAmt)) {
9421 static const unsigned IIDs[] = {
9422 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9423 Intrinsic::ppc_altivec_vslw
9430 if (SextVal == (
int)((
unsigned)
i >> TypeShiftAmt)) {
9432 static const unsigned IIDs[] = {
9433 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9434 Intrinsic::ppc_altivec_vsrw
9441 if (SextVal == (
int)(((
unsigned)
i << TypeShiftAmt) |
9442 ((
unsigned)
i >> (SplatBitSize-TypeShiftAmt)))) {
9444 static const unsigned IIDs[] = {
9445 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9446 Intrinsic::ppc_altivec_vrlw
9453 if (SextVal == (
int)(((
unsigned)
i << 8) | (
i < 0 ? 0xFF : 0))) {
9459 if (SextVal == (
int)(((
unsigned)
i << 16) | (
i < 0 ? 0xFFFF : 0))) {
9465 if (SextVal == (
int)(((
unsigned)
i << 24) | (
i < 0 ? 0xFFFFFF : 0))) {
9480 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9481 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9482 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9498 if (LHSID == (1*9+2)*9+3)
return LHS;
9499 assert(LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!");
9511 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9512 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9513 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9514 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9517 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9518 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9519 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9520 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9523 for (
unsigned i = 0;
i != 16; ++
i)
9524 ShufIdxs[
i] = (
i&3)+0;
9527 for (
unsigned i = 0;
i != 16; ++
i)
9528 ShufIdxs[
i] = (
i&3)+4;
9531 for (
unsigned i = 0;
i != 16; ++
i)
9532 ShufIdxs[
i] = (
i&3)+8;
9535 for (
unsigned i = 0;
i != 16; ++
i)
9536 ShufIdxs[
i] = (
i&3)+12;
9557 const unsigned BytesInVector = 16;
9562 unsigned ShiftElts = 0, InsertAtByte = 0;
9566 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9567 0, 15, 14, 13, 12, 11, 10, 9};
9568 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9569 1, 2, 3, 4, 5, 6, 7, 8};
9572 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9584 bool FoundCandidate =
false;
9588 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9591 for (
unsigned i = 0;
i < BytesInVector; ++
i) {
9592 unsigned CurrentElement =
Mask[
i];
9595 if (
V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9598 bool OtherElementsInOrder =
true;
9601 for (
unsigned j = 0;
j < BytesInVector; ++
j) {
9608 (!
V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9609 if (
Mask[
j] != OriginalOrder[
j] + MaskOffset) {
9610 OtherElementsInOrder =
false;
9617 if (OtherElementsInOrder) {
9624 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9625 : BigEndianShifts[CurrentElement & 0xF];
9626 Swap = CurrentElement < BytesInVector;
9628 InsertAtByte = IsLE ? BytesInVector - (
i + 1) :
i;
9629 FoundCandidate =
true;
9634 if (!FoundCandidate)
9658 const unsigned NumHalfWords = 8;
9659 const unsigned BytesInVector = NumHalfWords * 2;
9668 unsigned ShiftElts = 0, InsertAtByte = 0;
9672 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9673 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9676 uint32_t OriginalOrderLow = 0x1234567;
9677 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9680 for (
unsigned i = 0;
i < NumHalfWords; ++
i) {
9681 unsigned MaskShift = (NumHalfWords - 1 -
i) * 4;
9698 bool FoundCandidate =
false;
9701 for (
unsigned i = 0;
i < NumHalfWords; ++
i) {
9702 unsigned MaskShift = (NumHalfWords - 1 -
i) * 4;
9704 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9712 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9713 TargetOrder = OriginalOrderLow;
9717 if (MaskOneElt == VINSERTHSrcElem &&
9718 (
Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9719 InsertAtByte = IsLE ? BytesInVector - (
i + 1) * 2 :
i * 2;
9720 FoundCandidate =
true;
9726 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9728 if ((
Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9730 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9731 : BigEndianShifts[MaskOneElt & 0x7];
9732 InsertAtByte = IsLE ? BytesInVector - (
i + 1) * 2 :
i * 2;
9733 Swap = MaskOneElt < NumHalfWords;
9734 FoundCandidate =
true;
9740 if (!FoundCandidate)
9775 auto ShuffleMask = SVN->
getMask();
9790 ShuffleMask = CommutedSV->
getMask();
9799 APInt APSplatValue, APSplatUndef;
9800 unsigned SplatBitSize;
9816 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9817 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9818 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9820 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9821 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9822 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9830 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9831 SplatVal |= (SplatVal << SplatBitSize);
9846 "Only set v1i128 as custom, other type shouldn't reach here!");
9851 if (SHLAmt % 8 == 0) {
9852 std::array<int, 16>
Mask;
9853 std::iota(
Mask.begin(),
Mask.end(), 0);
9883 if (
SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9884 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9887 SVOp = cast<ShuffleVectorSDNode>(
Op);
9888 V1 =
Op.getOperand(0);
9889 V2 =
Op.getOperand(1);
9891 EVT VT =
Op.getValueType();
9894 unsigned ShiftElts, InsertAtByte;
9900 bool IsPermutedLoad =
false;
9902 if (InputLoad && Subtarget.hasVSX() &&
V2.isUndef() &&
9912 if (IsPermutedLoad) {
9913 assert((isLittleEndian || IsFourByte) &&
9914 "Unexpected size for permuted load on big endian target");
9915 SplatIdx += IsFourByte ? 2 : 1;
9916 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9917 "Splat of a value outside of the loaded memory");
9922 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9925 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9927 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9931 if (
LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9947 Ops,
LD->getMemoryVT(),
LD->getMemOperand());
9959 if (Subtarget.hasP9Vector() &&
9978 if (Subtarget.hasPrefixInstrs()) {
9980 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9981 return SplatInsertNode;
9984 if (Subtarget.hasP9Altivec()) {
9986 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9989 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9993 if (Subtarget.hasVSX() &&
10006 if (Subtarget.hasVSX() &&
10019 if (Subtarget.hasP9Vector()) {
10039 if (Subtarget.hasVSX()) {
10060 if (
V2.isUndef()) {
10073 (Subtarget.hasP8Altivec() && (
10084 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10094 (Subtarget.hasP8Altivec() && (
10105 unsigned PFIndexes[4];
10106 bool isFourElementShuffle =
true;
10107 for (
unsigned i = 0;
i != 4 && isFourElementShuffle;
10109 unsigned EltNo = 8;
10110 for (
unsigned j = 0;
j != 4; ++
j) {
10111 if (PermMask[
i * 4 +
j] < 0)
10114 unsigned ByteSource = PermMask[
i * 4 +
j];
10115 if ((ByteSource & 3) !=
j) {
10116 isFourElementShuffle =
false;
10121 EltNo = ByteSource / 4;
10122 }
else if (EltNo != ByteSource / 4) {
10123 isFourElementShuffle =
false;
10127 PFIndexes[
i] = EltNo;
10135 if (isFourElementShuffle) {
10137 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10138 PFIndexes[2] * 9 + PFIndexes[3];
10141 unsigned Cost = (PFEntry >> 30);
10161 if (
V2.isUndef())
V2 = V1;
10163 return LowerVPERM(
Op, DAG, PermMask, VT, V1,
V2);
10172 bool NeedSwap =
false;
10174 bool isPPC64 = Subtarget.
isPPC64();
10178 if (isLittleEndian)
10181 if (Subtarget.isISA3_0() && (V1->
hasOneUse() ||
V2->hasOneUse())) {
10182 LLVM_DEBUG(
dbgs() <<
"At least one of two input vectors are dead - using "
10183 "XXPERM instead\n");
10190 NeedSwap = !NeedSwap;
10225 unsigned SrcElt = PermMask[
i] < 0 ? 0 : PermMask[
i];
10228 if (V1HasXXSWAPD) {
10231 else if (SrcElt < 16)
10234 if (V2HasXXSWAPD) {
10237 else if (SrcElt > 15)
10248 for (
unsigned j = 0;
j != BytesPerElement; ++
j)
10249 if (isLittleEndian)
10250 ResultMask.push_back(
10253 ResultMask.push_back(
10257 if (Opcode ==
PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10258 if (V1HasXXSWAPD) {
10262 if (V2HasXXSWAPD) {
10263 dl =
SDLoc(
V2->getOperand(0));
10264 V2 =
V2->getOperand(0)->getOperand(1);
10272 ShufflesHandledWithVPERM++;
10277 dbgs() <<
"Emitting a XXPERM for the following shuffle:\n";
10279 dbgs() <<
"Emitting a VPERM for the following shuffle:\n";
10282 dbgs() <<
"With the following permute control vector:\n";
10301 unsigned IntrinsicID =
10302 cast<ConstantSDNode>(Intrin.
getOperand(0))->getZExtValue();
10305 switch (IntrinsicID) {
10309 case Intrinsic::ppc_altivec_vcmpbfp_p:
10313 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10317 case Intrinsic::ppc_altivec_vcmpequb_p:
10321 case Intrinsic::ppc_altivec_vcmpequh_p:
10325 case Intrinsic::ppc_altivec_vcmpequw_p:
10329 case Intrinsic::ppc_altivec_vcmpequd_p:
10330 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10336 case Intrinsic::ppc_altivec_vcmpneb_p:
10337 case Intrinsic::ppc_altivec_vcmpneh_p:
10338 case Intrinsic::ppc_altivec_vcmpnew_p:
10339 case Intrinsic::ppc_altivec_vcmpnezb_p:
10340 case Intrinsic::ppc_altivec_vcmpnezh_p:
10341 case Intrinsic::ppc_altivec_vcmpnezw_p:
10342 if (Subtarget.hasP9Altivec()) {
10343 switch (IntrinsicID) {
10346 case Intrinsic::ppc_altivec_vcmpneb_p:
10349 case Intrinsic::ppc_altivec_vcmpneh_p:
10352 case Intrinsic::ppc_altivec_vcmpnew_p:
10355 case Intrinsic::ppc_altivec_vcmpnezb_p:
10358 case Intrinsic::ppc_altivec_vcmpnezh_p:
10361 case Intrinsic::ppc_altivec_vcmpnezw_p:
10369 case Intrinsic::ppc_altivec_vcmpgefp_p:
10373 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10377 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10381 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10385 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10389 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10390 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10396 case Intrinsic::ppc_altivec_vcmpgtub_p:
10400 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10404 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10408 case Intrinsic::ppc_altivec_vcmpgtud_p:
10409 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10416 case Intrinsic::ppc_altivec_vcmpequq:
10417 case Intrinsic::ppc_altivec_vcmpgtsq:
10418 case Intrinsic::ppc_altivec_vcmpgtuq:
10419 if (!Subtarget.isISA3_1())
10421 switch (IntrinsicID) {
10424 case Intrinsic::ppc_altivec_vcmpequq:
10427 case Intrinsic::ppc_altivec_vcmpgtsq:
10430 case Intrinsic::ppc_altivec_vcmpgtuq:
10437 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10438 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10439 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10440 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10441 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10442 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10443 if (Subtarget.hasVSX()) {
10444 switch (IntrinsicID) {
10445 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10448 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10451 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10454 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10457 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10460 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10470 case Intrinsic::ppc_altivec_vcmpbfp:
10473 case Intrinsic::ppc_altivec_vcmpeqfp:
10476 case Intrinsic::ppc_altivec_vcmpequb:
10479 case Intrinsic::ppc_altivec_vcmpequh:
10482 case Intrinsic::ppc_altivec_vcmpequw:
10485 case Intrinsic::ppc_altivec_vcmpequd:
10486 if (Subtarget.hasP8Altivec())
10491 case Intrinsic::ppc_altivec_vcmpneb:
10492 case Intrinsic::ppc_altivec_vcmpneh:
10493 case Intrinsic::ppc_altivec_vcmpnew:
10494 case Intrinsic::ppc_altivec_vcmpnezb:
10495 case Intrinsic::ppc_altivec_vcmpnezh:
10496 case Intrinsic::ppc_altivec_vcmpnezw:
10497 if (Subtarget.hasP9Altivec())
10498 switch (IntrinsicID) {
10501 case Intrinsic::ppc_altivec_vcmpneb:
10504 case Intrinsic::ppc_altivec_vcmpneh:
10507 case Intrinsic::ppc_altivec_vcmpnew:
10510 case Intrinsic::ppc_altivec_vcmpnezb:
10513 case Intrinsic::ppc_altivec_vcmpnezh:
10516 case Intrinsic::ppc_altivec_vcmpnezw:
10523 case Intrinsic::ppc_altivec_vcmpgefp:
10526 case Intrinsic::ppc_altivec_vcmpgtfp:
10529 case Intrinsic::ppc_altivec_vcmpgtsb:
10532 case Intrinsic::ppc_altivec_vcmpgtsh:
10535 case Intrinsic::ppc_altivec_vcmpgtsw:
10538 case Intrinsic::ppc_altivec_vcmpgtsd:
10539 if (Subtarget.hasP8Altivec())
10544 case Intrinsic::ppc_altivec_vcmpgtub:
10547 case Intrinsic::ppc_altivec_vcmpgtuh:
10550 case Intrinsic::ppc_altivec_vcmpgtuw:
10553 case Intrinsic::ppc_altivec_vcmpgtud:
10554 if (Subtarget.hasP8Altivec())
10559 case Intrinsic::ppc_altivec_vcmpequq_p:
10560 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10561 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10562 if (!Subtarget.isISA3_1())
10564 switch (IntrinsicID) {
10567 case Intrinsic::ppc_altivec_vcmpequq_p:
10570 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10573 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10587 unsigned IntrinsicID =
10588 cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
10592 switch (IntrinsicID) {
10593 case Intrinsic::thread_pointer:
10599 case Intrinsic::ppc_mma_disassemble_acc: {
10600 if (Subtarget.isISAFuture()) {
10616 RetOps.push_back(Extract);
10622 RetOps.push_back(Extract);
10628 RetOps.push_back(Extract);
10634 RetOps.push_back(Extract);
10639 case Intrinsic::ppc_vsx_disassemble_pair: {
10642 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10647 for (
int VecNo = 0; VecNo < NumVecs; VecNo++) {
10653 RetOps.push_back(Extract);
10658 case Intrinsic::ppc_unpack_longdouble: {
10659 auto *Idx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
10660 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10661 "Argument of long double unpack must be 0 or 1!");
10664 Idx->getValueType(0)));
10667 case Intrinsic::ppc_compare_exp_lt:
10668 case Intrinsic::ppc_compare_exp_gt:
10669 case Intrinsic::ppc_compare_exp_eq:
10670 case Intrinsic::ppc_compare_exp_uo: {
10672 switch (IntrinsicID) {
10673 case Intrinsic::ppc_compare_exp_lt:
10676 case Intrinsic::ppc_compare_exp_gt:
10679 case Intrinsic::ppc_compare_exp_eq:
10682 case Intrinsic::ppc_compare_exp_uo:
10689 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10690 Op.getOperand(1), Op.getOperand(2)),
10692 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10693 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10696 case Intrinsic::ppc_test_data_class: {
10697 EVT OpVT =
Op.getOperand(1).getValueType();
10698 unsigned CmprOpc = OpVT ==
MVT::f128 ? PPC::XSTSTDCQP
10699 : (OpVT ==
MVT::f64 ? PPC::XSTSTDCDP
10704 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10707 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10708 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10711 case Intrinsic::ppc_fnmsub: {
10712 EVT VT =
Op.getOperand(1).getValueType();
10713 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT ==
MVT::f128))
10719 Op.getOperand(2),
Op.getOperand(3));
10721 case Intrinsic::ppc_convert_f128_to_ppcf128:
10722 case Intrinsic::ppc_convert_ppcf128_to_f128: {
10723 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10724 ? RTLIB::CONVERT_PPCF128_F128
10725 : RTLIB::CONVERT_F128_PPCF128;
10726 MakeLibCallOptions CallOptions;
10727 std::pair<SDValue, SDValue>
Result =
10728 makeLibCall(DAG, LC,
Op.getValueType(),
Op.getOperand(1), CallOptions,
10732 case Intrinsic::ppc_maxfe:
10733 case Intrinsic::ppc_maxfl:
10734 case Intrinsic::ppc_maxfs:
10735 case Intrinsic::ppc_minfe:
10736 case Intrinsic::ppc_minfl:
10737 case Intrinsic::ppc_minfs: {
10738 EVT VT =
Op.getValueType();
10741 [VT](
const SDUse &
Use) { return Use.getValueType() == VT; }) &&
10742 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10745 if (IntrinsicID == Intrinsic::ppc_minfe ||
10746 IntrinsicID == Intrinsic::ppc_minfl ||
10747 IntrinsicID == Intrinsic::ppc_minfs)
10749 unsigned I =
Op.getNumOperands() - 2, Cnt =
I;
10751 for (--
I; Cnt != 0; --Cnt,
I = (--
I == 0 ? (
Op.getNumOperands() - 1) :
I)) {
10769 Op.getOperand(1),
Op.getOperand(2),
10792 switch (cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue()) {
10795 BitNo = 0; InvertBit =
false;
10798 BitNo = 0; InvertBit =
true;
10801 BitNo = 2; InvertBit =
false;
10804 BitNo = 2; InvertBit =
true;
10826 int ArgStart = isa<ConstantSDNode>(
Op.getOperand(0)) ? 0 : 1;
10828 switch (cast<ConstantSDNode>(
Op.getOperand(ArgStart))->getZExtValue()) {
10829 case Intrinsic::ppc_cfence: {
10830 assert(ArgStart == 1 &&
"llvm.ppc.cfence must carry a chain argument.");
10831 assert(Subtarget.
isPPC64() &&
"Only 64-bit is supported for now.");
10832 SDValue Val =
Op.getOperand(ArgStart + 1);
10862 int VectorIndex = 0;
10875 "Expecting an atomic compare-and-swap here.");
10877 auto *AtomicNode = cast<AtomicSDNode>(
Op.getNode());
10878 EVT MemVT = AtomicNode->getMemoryVT();
10896 for (
int i = 0,
e = AtomicNode->getNumOperands();
i <
e;
i++)
10897 Ops.push_back(AtomicNode->getOperand(
i));
10909 EVT MemVT =
N->getMemoryVT();
10911 "Expect quadword atomic operations");
10913 unsigned Opc =
N->getOpcode();
10922 for (
int I = 1,
E =
N->getNumOperands();
I <
E; ++
I)
10923 Ops.push_back(
N->getOperand(
I));
10925 Ops, MemVT,
N->getMemOperand());
10948 Ops.push_back(ValLo);
10949 Ops.push_back(ValHi);
10950 Ops.push_back(
N->getOperand(1));
10952 N->getMemOperand());
10978 "Should only be called for ISD::INSERT_VECTOR_ELT");
10982 EVT VT =
Op.getValueType();
10990 if (Subtarget.hasP9Vector()) {
11000 (isa<LoadSDNode>(
V2))) {
11005 BitcastLoad,
Op.getOperand(2));
11010 if (Subtarget.isISA3_1()) {
11029 unsigned InsertAtElement =
C->getZExtValue();
11030 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11032 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11046 EVT VT =
Op.getValueType();
11055 "Type unsupported without MMA");
11057 "Type unsupported without paired vector support");
11062 for (
unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11070 Loads.push_back(
Load);
11071 LoadChains.push_back(
Load.getValue(1));
11093 EVT StoreVT =
Value.getValueType();
11102 "Type unsupported without MMA");
11104 "Type unsupported without paired vector support");
11107 unsigned NumVecs = 2;
11109 if (Subtarget.isISAFuture()) {
11112 PPC::DMXXEXTFDMR512, dl,
ArrayRef(ReturnTypes, 2),
Op.getOperand(1));
11115 Value2 =
SDValue(ExtNode, 1);
11120 for (
unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11121 unsigned VecNum = Subtarget.
isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11123 if (Subtarget.isISAFuture()) {
11124 VecNum = Subtarget.
isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11126 Idx > 1 ? Value2 :
Value,
11133 DAG.
getStore(StoreChain, dl, Elt, BasePtr,
11139 Stores.push_back(
Store);
11191 for (
unsigned i = 0;
i != 8; ++
i) {
11192 if (isLittleEndian) {
11194 Ops[
i*2+1] = 2*
i+16;
11197 Ops[
i*2+1] = 2*
i+1+16;
11200 if (isLittleEndian)
11210 bool IsStrict =
Op->isStrictFPOpcode();
11211 if (
Op.getOperand(IsStrict ? 1 : 0).getValueType() ==
MVT::f128 &&
11212 !Subtarget.hasP9Vector())
11222 "Should only be called for ISD::FP_EXTEND");
11239 "Node should have 2 operands with second one being a constant!");
11245 int Idx = cast<ConstantSDNode>(Op0.
getOperand(1))->getZExtValue();
11251 int DWord = Idx >> 1;
11274 LD->getMemoryVT(),
LD->getMemOperand());
11287 LD->getMemoryVT(),
LD->getMemOperand());
11298 switch (
Op.getOpcode()) {
11327 return LowerGET_DYNAMIC_AREA_OFFSET(
Op, DAG);
11353 case ISD::FSHL:
return LowerFunnelShift(
Op, DAG);
11354 case ISD::FSHR:
return LowerFunnelShift(
Op, DAG);
11366 return LowerFP_ROUND(
Op, DAG);
11379 return LowerINTRINSIC_VOID(
Op, DAG);
11381 return LowerBSWAP(
Op, DAG);
11383 return LowerATOMIC_CMP_SWAP(
Op, DAG);
11385 return LowerATOMIC_LOAD_STORE(
Op, DAG);
11393 switch (
N->getOpcode()) {
11395 llvm_unreachable(
"Do not know how to custom type legalize this operation!");
11412 if (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue() !=
11413 Intrinsic::loop_decrement)
11417 "Unexpected result type for CTR decrement intrinsic");
11419 N->getValueType(0));
11429 switch (cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue()) {
11430 case Intrinsic::ppc_pack_longdouble:
11432 N->getOperand(2),
N->getOperand(1)));
11434 case Intrinsic::ppc_maxfe:
11435 case Intrinsic::ppc_minfe:
11436 case Intrinsic::ppc_fnmsub:
11437 case Intrinsic::ppc_convert_f128_to_ppcf128:
11447 EVT VT =
N->getValueType(0);
11462 if (
N->getOperand(
N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11466 Results.push_back(LoweredValue);
11467 if (
N->isStrictFPOpcode())
11472 if (!
N->getValueType(0).isVector())
11501 return Builder.CreateCall(Func, {});
11523 if (isa<LoadInst>(Inst) && Subtarget.
isPPC64())
11526 Builder.GetInsertBlock()->getParent()->getParent(),
11527 Intrinsic::ppc_cfence, {Inst->getType()}),
11537 unsigned AtomicSize,
11538 unsigned BinOpcode,
11539 unsigned CmpOpcode,
11540 unsigned CmpPred)
const {
11544 auto LoadMnemonic = PPC::LDARX;
11545 auto StoreMnemonic = PPC::STDCX;
11546 switch (AtomicSize) {
11550 LoadMnemonic = PPC::LBARX;
11551 StoreMnemonic = PPC::STBCX;
11552 assert(Subtarget.hasPartwordAtomics() &&
"Call this only with size >=4");
11555 LoadMnemonic = PPC::LHARX;
11556 StoreMnemonic = PPC::STHCX;
11557 assert(Subtarget.hasPartwordAtomics() &&
"Call this only with size >=4");
11560 LoadMnemonic = PPC::LWARX;
11561 StoreMnemonic = PPC::STWCX;
11564 LoadMnemonic = PPC::LDARX;
11565 StoreMnemonic = PPC::STDCX;
11581 CmpOpcode ?
F->CreateMachineBasicBlock(LLVM_BB) :
nullptr;
11583 F->insert(It, loopMBB);
11585 F->insert(It, loop2MBB);
11586 F->insert(It, exitMBB);
11592 Register TmpReg = (!BinOpcode) ? incr :
11594 : &PPC::GPRCRegClass);
11599 BB->addSuccessor(loopMBB);
11626 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11628 BuildMI(
BB, dl,
TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11638 BB->addSuccessor(loop2MBB);
11639 BB->addSuccessor(exitMBB);
11646 BB->addSuccessor(loopMBB);
11647 BB->addSuccessor(exitMBB);
11656 switch(
MI.getOpcode()) {
11660 return TII->isSignExtended(
MI.getOperand(1).getReg(),
11661 &
MI.getMF()->getRegInfo());
11685 case PPC::EXTSB8_32_64:
11686 case PPC::EXTSB8_rec:
11687 case PPC::EXTSB_rec:
11690 case PPC::EXTSH8_32_64:
11691 case PPC::EXTSH8_rec:
11692 case PPC::EXTSH_rec:
11695 case PPC::EXTSWSLI_32_64:
11696 case PPC::EXTSWSLI_32_64_rec:
11697 case PPC::EXTSWSLI_rec:
11698 case PPC::EXTSW_32:
11699 case PPC::EXTSW_32_64:
11700 case PPC::EXTSW_32_64_rec:
11701 case PPC::EXTSW_rec:
11704 case PPC::SRAWI_rec:
11705 case PPC::SRAW_rec:
11714 unsigned BinOpcode,
unsigned CmpOpcode,
unsigned CmpPred)
const {
11724 bool IsSignExtended =
11727 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11729 BuildMI(*
BB,
MI, dl,
TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11730 .
addReg(
MI.getOperand(3).getReg());
11731 MI.getOperand(3).setReg(ValueReg);
11734 if (Subtarget.hasPartwordAtomics())
11742 bool is64bit = Subtarget.
isPPC64();
11744 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11755 CmpOpcode ?
F->CreateMachineBasicBlock(LLVM_BB) :
nullptr;
11757 F->insert(It, loopMBB);
11759 F->insert(It, loop2MBB);
11760 F->insert(It, exitMBB);
11766 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11789 BB->addSuccessor(loopMBB);
11811 if (ptrA != ZeroReg) {
11813 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11822 .
addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11825 .
addImm(is8bit ? 28 : 27);
11826 if (!isLittleEndian)
11829 .
addImm(is8bit ? 24 : 16);
11874 unsigned ValueReg = SReg;
11875 unsigned CmpReg = Incr2Reg;
11876 if (CmpOpcode == PPC::CMPW) {
11882 BuildMI(
BB, dl,
TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11884 ValueReg = ValueSReg;
11892 BB->addSuccessor(loop2MBB);
11893 BB->addSuccessor(exitMBB);
11905 BB->addSuccessor(loopMBB);
11906 BB->addSuccessor(exitMBB);
11916 .
addImm(is8bit ? 24 : 16)
11937 Register DstReg =
MI.getOperand(0).getReg();
11945 "Invalid Pointer Size!");
11994 Register BufReg =
MI.getOperand(1).getReg();
12009 BaseReg = Subtarget.
isPPC64() ? PPC::X1 : PPC::R1;
12011 BaseReg = Subtarget.
isPPC64() ? PPC::BP8 : PPC::BP;
12014 TII->get(Subtarget.
isPPC64() ? PPC::STD : PPC::STW))
12037 TII->get(Subtarget.
isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12062 MI.eraseFromParent();
12077 "Invalid Pointer Size!");
12080 (PVT ==
MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12083 unsigned FP = (PVT ==
MVT::i64) ? PPC::X31 : PPC::R31;
12084 unsigned SP = (PVT ==
MVT::i64) ? PPC::X1 : PPC::R1;
12098 Register BufReg =
MI.getOperand(0).getReg();
12164 MI.eraseFromParent();
12180 "Unexpected stack alignment");
12184 unsigned StackProbeSize =
12188 return StackProbeSize ? StackProbeSize :
StackAlign;
12200 const bool isPPC64 = Subtarget.
isPPC64();
12232 MF->
insert(MBBIter, TestMBB);
12233 MF->
insert(MBBIter, BlockMBB);
12234 MF->
insert(MBBIter, TailMBB);
12239 Register DstReg =
MI.getOperand(0).getReg();
12240 Register NegSizeReg =
MI.getOperand(1).getReg();
12241 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12252 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12258 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12259 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12261 .
addDef(ActualNegSizeReg)
12263 .
add(
MI.getOperand(2))
12264 .
add(
MI.getOperand(3));
12270 .
addReg(ActualNegSizeReg);
12273 int64_t NegProbeSize = -(int64_t)ProbeSize;
12274 assert(isInt<32>(NegProbeSize) &&
"Unhandled probe size!");
12276 if (!isInt<16>(NegProbeSize)) {
12279 .
addImm(NegProbeSize >> 16);
12283 .
addImm(NegProbeSize & 0xFFFF);
12292 .
addReg(ActualNegSizeReg)
12301 .
addReg(ActualNegSizeReg);
12311 BuildMI(TestMBB,
DL,
TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12325 BuildMI(BlockMBB,
DL,
TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12339 MaxCallFrameSizeReg)
12340 .
add(
MI.getOperand(2))
12341 .
add(
MI.getOperand(3));
12342 BuildMI(TailMBB,
DL,
TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12344 .
addReg(MaxCallFrameSizeReg);
12353 MI.eraseFromParent();
12355 ++NumDynamicAllocaProbed;
12378 if (
MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12379 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12381 }
else if (
MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12382 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12396 if (
MI.getOpcode() == PPC::SELECT_CC_I4 ||
12397 MI.getOpcode() == PPC::SELECT_CC_I8 ||
MI.getOpcode() == PPC::SELECT_I4 ||
12398 MI.getOpcode() == PPC::SELECT_I8) {
12400 if (
MI.getOpcode() == PPC::SELECT_CC_I4 ||
12401 MI.getOpcode() == PPC::SELECT_CC_I8)
12402 Cond.push_back(
MI.getOperand(4));
12405 Cond.push_back(
MI.getOperand(1));
12408 TII->insertSelect(*
BB,
MI, dl,
MI.getOperand(0).getReg(),
Cond,
12409 MI.getOperand(2).getReg(),
MI.getOperand(3).getReg());
12410 }
else if (
MI.getOpcode() == PPC::SELECT_CC_F4 ||
12411 MI.getOpcode() == PPC::SELECT_CC_F8 ||
12412 MI.getOpcode() == PPC::SELECT_CC_F16 ||
12413 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12414 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12415 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12416 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12417 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12418 MI.getOpcode() == PPC::SELECT_CC_SPE ||
12419 MI.getOpcode() == PPC::SELECT_F4 ||
12420 MI.getOpcode() == PPC::SELECT_F8 ||
12421 MI.getOpcode() == PPC::SELECT_F16 ||
12422 MI.getOpcode() == PPC::SELECT_SPE ||
12423 MI.getOpcode() == PPC::SELECT_SPE4 ||
12424 MI.getOpcode() == PPC::SELECT_VRRC ||
12425 MI.getOpcode() == PPC::SELECT_VSFRC ||
12426 MI.getOpcode() == PPC::SELECT_VSSRC ||
12427 MI.getOpcode() == PPC::SELECT_VSRC) {
12442 F->insert(It, copy0MBB);
12443 F->insert(It, sinkMBB);
12451 BB->addSuccessor(copy0MBB);
12452 BB->addSuccessor(sinkMBB);
12454 if (
MI.getOpcode() == PPC::SELECT_I4 ||
MI.getOpcode() == PPC::SELECT_I8 ||
12455 MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
12456 MI.getOpcode() == PPC::SELECT_F16 ||
12457 MI.getOpcode() == PPC::SELECT_SPE4 ||
12458 MI.getOpcode() == PPC::SELECT_SPE ||
12459 MI.getOpcode() == PPC::SELECT_VRRC ||
12460 MI.getOpcode() == PPC::SELECT_VSFRC ||
12461 MI.getOpcode() == PPC::SELECT_VSSRC ||
12462 MI.getOpcode() == PPC::SELECT_VSRC) {
12464 .
addReg(
MI.getOperand(1).getReg())
12467 unsigned SelectPred =
MI.getOperand(4).getImm();
12470 .
addReg(
MI.getOperand(1).getReg())
12480 BB->addSuccessor(sinkMBB);
12487 .
addReg(
MI.getOperand(3).getReg())
12489 .
addReg(
MI.getOperand(2).getReg())
12491 }
else if (
MI.getOpcode() == PPC::ReadTB) {
12507 F->insert(It, readMBB);
12508 F->insert(It, sinkMBB);
12515 BB->addSuccessor(readMBB);
12537 BB->addSuccessor(readMBB);
12538 BB->addSuccessor(sinkMBB);
12539 }
else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12541 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12543 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12545 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12548 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12550 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12552 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12554 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12557 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12559 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12561 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12563 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12566 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12568 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12570 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12572 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12575 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12577 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12579 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12581 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12584 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12586 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12588 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12590 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12593 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12595 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12597 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12599 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12602 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12604 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12606 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12608 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12611 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12613 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12615 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12617 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12620 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12622 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12624 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12626 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12629 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12631 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12633 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12635 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12637 else if (
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12638 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12639 (Subtarget.hasPartwordAtomics() &&
12640 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12641 (Subtarget.hasPartwordAtomics() &&
12642 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12643 bool is64bit =
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12645 auto LoadMnemonic = PPC::LDARX;
12646 auto StoreMnemonic = PPC::STDCX;
12647 switch (
MI.getOpcode()) {
12650 case PPC::ATOMIC_CMP_SWAP_I8:
12651 LoadMnemonic = PPC::LBARX;
12652 StoreMnemonic = PPC::STBCX;
12653 assert(Subtarget.hasPartwordAtomics() &&
"No support partword atomics.");
12655 case PPC::ATOMIC_CMP_SWAP_I16:
12656 LoadMnemonic = PPC::LHARX;
12657 StoreMnemonic = PPC::STHCX;
12658 assert(Subtarget.hasPartwordAtomics() &&
"No support partword atomics.");
12660 case PPC::ATOMIC_CMP_SWAP_I32:
12661 LoadMnemonic = PPC::LWARX;
12662 StoreMnemonic = PPC::STWCX;
12664 case PPC::ATOMIC_CMP_SWAP_I64:
12665 LoadMnemonic = PPC::LDARX;
12666 StoreMnemonic = PPC::STDCX;
12674 Register oldval =
MI.getOperand(3).getReg();
12675 Register newval =
MI.getOperand(4).getReg();
12681 F->insert(It, loop1MBB);
12682 F->insert(It, loop2MBB);
12683 F->insert(It, exitMBB);
12691 BB->addSuccessor(loop1MBB);
12704 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
12711 BB->addSuccessor(loop2MBB);
12712 BB->addSuccessor(exitMBB);
12724 BB->addSuccessor(loop1MBB);
12725 BB->addSuccessor(exitMBB);
12730 }
else if (
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12731 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12735 bool is64bit = Subtarget.
isPPC64();
12737 bool is8bit =
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12742 Register oldval =
MI.getOperand(3).getReg();
12743 Register newval =
MI.getOperand(4).getReg();
12749 F->insert(It, loop1MBB);
12750 F->insert(It, loop2MBB);
12751 F->insert(It, exitMBB);
12758 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12777 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12782 BB->addSuccessor(loop1MBB);
12809 if (ptrA != ZeroReg) {
12811 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12821 .
addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12824 .
addImm(is8bit ? 28 : 27);
12825 if (!isLittleEndian)
12828 .
addImm(is8bit ? 24 : 16);
12878 BB->addSuccessor(loop2MBB);
12879 BB->addSuccessor(exitMBB);
12897 BB->addSuccessor(loop1MBB);
12898 BB->addSuccessor(exitMBB);
12906 }
else if (
MI.getOpcode() == PPC::FADDrtz) {
12941 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12942 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12943 unsigned Opcode = (
MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12944 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12948 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12952 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12956 .
addReg(
MI.getOperand(1).getReg())
12959 MI.getOperand(0).getReg())
12960 .
addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12961 }
else if (
MI.getOpcode() == PPC::TCHECK_RET) {
12967 MI.getOperand(0).getReg())
12969 }
else if (
MI.getOpcode() == PPC::TBEGIN_RET) {
12971 unsigned Imm =
MI.getOperand(1).getImm();
12974 MI.getOperand(0).getReg())
12976 }
else if (
MI.getOpcode() == PPC::SETRNDi) {
12978 Register OldFPSCRReg =
MI.getOperand(0).getReg();
12982 BuildMI(*
BB,
MI, dl,
TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12995 unsigned Mode =
MI.getOperand(1).getImm();
13003 }
else if (
MI.getOpcode() == PPC::SETRND) {
13011 auto copyRegFromG8RCOrF8RC = [&] (
unsigned DestReg,
unsigned SrcReg) {
13012 if (Subtarget.hasDirectMove()) {
13017 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13020 if (RC == &PPC::F8RCRegClass) {
13023 "Unsupported RegClass.");
13025 StoreOp = PPC::STFD;
13030 (RegInfo.
getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13031 "Unsupported RegClass.");
13064 Register OldFPSCRReg =
MI.getOperand(0).getReg();
13081 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13089 BuildMI(*
BB,
MI, dl,
TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13103 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13112 }
else if (
MI.getOpcode() == PPC::SETFLM) {
13116 Register OldFPSCRReg =
MI.getOperand(0).getReg();
13118 BuildMI(*
BB,
MI, Dl,
TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13123 Register NewFPSCRReg =
MI.getOperand(1).getReg();
13129 }
else if (
MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13130 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13132 }
else if (
MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13139 .
addUse(Src, 0, PPC::sub_gp8_x1);
13142 .
addUse(Src, 0, PPC::sub_gp8_x0);
13143 }
else if (
MI.getOpcode() == PPC::LQX_PSEUDO ||
13144 MI.getOpcode() == PPC::STQX_PSEUDO) {
13150 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13156 MI.getOpcode() == PPC::LQX_PSEUDO ?
TII->get(PPC::LQ)
13157 :
TII->get(PPC::STQ))
13165 MI.eraseFromParent();
13178 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13181 return RefinementSteps;
13187 EVT VT =
Op.getValueType();
13214 PPCTargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13217 EVT VT =
Op.getValueType();
13226 int Enabled,
int &RefinementSteps,
13227 bool &UseOneConstNR,
13228 bool Reciprocal)
const {
13230 if ((VT ==
MVT::f32 && Subtarget.hasFRSQRTES()) ||
13231 (VT ==
MVT::f64 && Subtarget.hasFRSQRTE()) ||
13232 (VT ==
MVT::v4f32 && Subtarget.hasAltivec()) ||
13234 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13239 UseOneConstNR = !Subtarget.needsTwoConstNR();
13247 int &RefinementSteps)
const {
13249 if ((VT ==
MVT::f32 && Subtarget.hasFRES()) ||
13250 (VT ==
MVT::f64 && Subtarget.hasFRE()) ||
13251 (VT ==
MVT::v4f32 && Subtarget.hasAltivec()) ||
13253 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13260 unsigned PPCTargetLowering::combineRepeatedFPDivisors()
const {
13298 unsigned Bytes,
int Dist,
13308 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13309 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13312 if (
FS != BFS ||
FS != (
int)Bytes)
return false;
13316 SDValue Base1 = Loc, Base2 = BaseLoc;
13317 int64_t Offset1 = 0, Offset2 = 0;
13320 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13330 if (isGA1 && isGA2 && GV1 == GV2)
13331 return Offset1 == (Offset2 + Dist*Bytes);
13338 unsigned Bytes,
int Dist,
13341 EVT VT =
LS->getMemoryVT();
13348 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
13349 default:
return false;
13350 case Intrinsic::ppc_altivec_lvx:
13351 case Intrinsic::ppc_altivec_lvxl:
13352 case Intrinsic::ppc_vsx_lxvw4x:
13353 case Intrinsic::ppc_vsx_lxvw4x_be:
13356 case Intrinsic::ppc_vsx_lxvd2x:
13357 case Intrinsic::ppc_vsx_lxvd2x_be:
13360 case Intrinsic::ppc_altivec_lvebx:
13363 case Intrinsic::ppc_altivec_lvehx:
13366 case Intrinsic::ppc_altivec_lvewx:
13376 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
13377 default:
return false;
13378 case Intrinsic::ppc_altivec_stvx:
13379 case Intrinsic::ppc_altivec_stvxl:
13380 case Intrinsic::ppc_vsx_stxvw4x:
13383 case Intrinsic::ppc_vsx_stxvd2x:
13386 case Intrinsic::ppc_vsx_stxvw4x_be:
13389 case Intrinsic::ppc_vsx_stxvd2x_be:
13392 case Intrinsic::ppc_altivec_stvebx:
13395 case Intrinsic::ppc_altivec_stvehx:
13398 case Intrinsic::ppc_altivec_stvewx:
13416 EVT VT =
LD->getMemoryVT();
13425 while (!Queue.empty()) {
13426 SDNode *ChainNext = Queue.pop_back_val();
13427 if (!Visited.
insert(ChainNext).second)
13430 if (
MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13434 if (!Visited.
count(ChainLD->getChain().getNode()))
13435 Queue.push_back(ChainLD->getChain().getNode());
13437 for (
const SDUse &
O : ChainNext->
ops())
13438 if (!Visited.
count(
O.getNode()))
13439 Queue.push_back(
O.getNode());
13441 LoadRoots.
insert(ChainNext);
13452 for (
SDNode *
I : LoadRoots) {
13453 Queue.push_back(
I);
13455 while (!Queue.empty()) {
13456 SDNode *LoadRoot = Queue.pop_back_val();
13457 if (!Visited.
insert(LoadRoot).second)
13460 if (
MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13465 if (((isa<MemSDNode>(U) &&
13466 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13469 Queue.push_back(U);
13502 auto Final = Shifted;
13513 DAGCombinerInfo &DCI)
const {
13521 if (!DCI.isAfterLegalizeDAG())
13526 for (
const SDNode *U :
N->uses())
13531 auto OpSize =
N->getOperand(0).getValueSizeInBits();
13535 if (OpSize < Size) {
13553 DAGCombinerInfo &DCI)
const {
13557 assert(Subtarget.useCRBits() &&
"Expecting to be tracking CR bits");
13571 if (
N->getOperand(0).getValueType() !=
MVT::i32 &&
13572 N->getOperand(0).getValueType() !=
MVT::i64)
13580 cast<CondCodeSDNode>(
N->getOperand(
13582 unsigned OpBits =
N->getOperand(0).getValueSizeInBits();
13593 return (
N->getOpcode() ==
ISD::SETCC ? ConvertSETCCToSubtract(
N, DCI)
13616 if (
N->getOperand(0).getOpcode() !=
ISD::AND &&
13617 N->getOperand(0).getOpcode() !=
ISD::OR &&
13618 N->getOperand(0).getOpcode() !=
ISD::XOR &&
13628 N->getOperand(1).getOpcode() !=
ISD::AND &&
13629 N->getOperand(1).getOpcode() !=
ISD::OR &&
13630 N->getOperand(1).getOpcode() !=
ISD::XOR &&
13643 for (
unsigned i = 0;
i < 2; ++
i) {
13647 N->getOperand(
i).getOperand(0).getValueType() ==
MVT::i1) ||
13648 isa<ConstantSDNode>(
N->getOperand(
i)))
13649 Inputs.push_back(
N->getOperand(
i));
13651 BinOps.push_back(
N->getOperand(
i));
13659 while (!BinOps.empty()) {
13665 PromOps.push_back(BinOp);
13701 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13702 if (isa<ConstantSDNode>(Inputs[
i]))
13725 for (
unsigned i = 0, ie = PromOps.size();
i != ie; ++
i) {
13747 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13750 if (isa<ConstantSDNode>(Inputs[
i]))
13756 std::list<HandleSDNode> PromOpHandles;
13757 for (
auto &PromOp : PromOps)
13758 PromOpHandles.emplace_back(PromOp);
13765 while (!PromOpHandles.empty()) {
13767 PromOpHandles.pop_back();
13773 if (!isa<ConstantSDNode>(PromOp.
getOperand(0)) &&
13776 PromOpHandles.emplace_front(PromOp);
13781 if (isa<ConstantSDNode>(RepValue))
13790 default:
C = 0;
break;
13795 if ((!isa<ConstantSDNode>(PromOp.
getOperand(
C)) &&
13803 PromOpHandles.emplace_front(PromOp);
13811 for (
unsigned i = 0;
i < 2; ++
i)
13812 if (isa<ConstantSDNode>(Ops[
C+
i]))
13821 return N->getOperand(0);
13829 DAGCombinerInfo &DCI)
const {
13851 if (!((
N->getOperand(0).getValueType() ==
MVT::i1 && Subtarget.useCRBits()) ||
13855 if (
N->getOperand(0).getOpcode() !=
ISD::AND &&
13856 N->getOperand(0).getOpcode() !=
ISD::OR &&
13857 N->getOperand(0).getOpcode() !=
ISD::XOR &&
13868 while (!BinOps.empty()) {
13874 PromOps.push_back(BinOp);
13907 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13908 if (isa<ConstantSDNode>(Inputs[
i]))
13919 SelectTruncOp[0].
insert(std::make_pair(
User,
13923 SelectTruncOp[0].
insert(std::make_pair(
User,
13926 SelectTruncOp[1].
insert(std::make_pair(
User,
13932 for (
unsigned i = 0, ie = PromOps.size();
i != ie; ++
i) {
13941 SelectTruncOp[0].
insert(std::make_pair(
User,
13945 SelectTruncOp[0].
insert(std::make_pair(
User,
13948 SelectTruncOp[1].
insert(std::make_pair(
User,
13954 unsigned PromBits =
N->getOperand(0).getValueSizeInBits();
13955 bool ReallyNeedsExt =
false;
13959 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13960 if (isa<ConstantSDNode>(Inputs[
i]))
13964 Inputs[
i].getOperand(0).getValueSizeInBits();
13965 assert(PromBits < OpBits &&
"Truncation not to a smaller bit count?");
13970 OpBits-PromBits))) ||
13973 (OpBits-(PromBits-1)))) {
13974 ReallyNeedsExt =
true;
13982 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13986 if (isa<ConstantSDNode>(Inputs[
i]))
13989 SDValue InSrc = Inputs[
i].getOperand(0);
14003 std::list<HandleSDNode> PromOpHandles;
14004 for (
auto &PromOp : PromOps)
14005 PromOpHandles.emplace_back(PromOp);
14011 while (!PromOpHandles.empty()) {
14013 PromOpHandles.pop_back();
14017 default:
C = 0;
break;
14022 if ((!isa<ConstantSDNode>(PromOp.
getOperand(
C)) &&
14030 PromOpHandles.emplace_front(PromOp);
14040 (SelectTruncOp[1].count(PromOp.
getNode()) &&
14042 PromOpHandles.emplace_front(PromOp);
14051 for (
unsigned i = 0;
i < 2; ++
i) {
14052 if (!isa<ConstantSDNode>(Ops[
C+
i]))
14069 auto SI0 = SelectTruncOp[0].
find(PromOp.
getNode());
14070 if (SI0 != SelectTruncOp[0].
end())
14072 auto SI1 = SelectTruncOp[1].
find(PromOp.
getNode());
14073 if (SI1 != SelectTruncOp[1].
end())
14082 if (!ReallyNeedsExt)
14083 return N->getOperand(0);
14090 N->getValueSizeInBits(0), PromBits),
14091 dl,
N->getValueType(0)));
14094 "Invalid extension type");
14097 DAG.
getConstant(
N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14105 DAGCombinerInfo &DCI)
const {
14107 "Should be called with a SETCC node");
14125 EVT VT =
N->getValueType(0);
14126 EVT OpVT =
LHS.getValueType();
14132 return DAGCombineTruncBoolExt(
N, DCI);
14151 combineElementTruncationToVectorTruncation(
SDNode *
N,
14152 DAGCombinerInfo &DCI)
const {
14154 "Should be called with a BUILD_VECTOR node");
14159 SDValue FirstInput =
N->getOperand(0);
14161 "The input operand must be an fp-to-int conversion.");
14170 bool IsSplat =
true;
14175 EVT TargetVT =
N->getValueType(0);
14176 for (
int i = 0,
e =
N->getNumOperands();
i <
e; ++
i) {
14181 if (NextConversion != FirstConversion)
14189 if (
N->getOperand(
i) != FirstInput)
14200 for (
int i = 0,
e =
N->getNumOperands();
i <
e; ++
i) {
14206 Ops.push_back(DAG.
getUNDEF(SrcVT));
14211 Ops.push_back(Trunc);
14214 Ops.push_back(
In.isUndef() ? DAG.
getUNDEF(SrcVT) :
In.getOperand(0));
14226 return DAG.
getNode(Opcode, dl, TargetVT, BV);
14239 "Should be called with a BUILD_VECTOR node");
14244 if (!
N->getValueType(0).getVectorElementType().isByteSized())
14247 bool InputsAreConsecutiveLoads =
true;
14248 bool InputsAreReverseConsecutive =
true;
14249 unsigned ElemSize =
N->getValueType(0).getScalarType().getStoreSize();
14250 SDValue FirstInput =
N->getOperand(0);
14251 bool IsRoundOfExtLoad =
false;
14256 FirstLoad = cast<LoadSDNode>(FirstInput.
getOperand(0));
14261 N->getNumOperands() == 1)
14264 if (!IsRoundOfExtLoad)
14265 FirstLoad = cast<LoadSDNode>(FirstInput);
14268 InputLoads.push_back(FirstLoad);
14269 for (
int i = 1,
e =
N->getNumOperands();
i <
e; ++
i) {
14271 if (IsRoundOfExtLoad &&
N->getOperand(
i).getOpcode() !=
ISD::FP_ROUND)
14274 SDValue NextInput = IsRoundOfExtLoad ?
N->getOperand(
i).getOperand(0) :
14280 IsRoundOfExtLoad ?
N->getOperand(
i-1).getOperand(0) :
N->getOperand(
i-1);
14281 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14282 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14291 InputsAreConsecutiveLoads =
false;
14293 InputsAreReverseConsecutive =
false;
14296 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14298 InputLoads.push_back(LD2);
14301 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14302 "The loads cannot be both consecutive and reverse consecutive.");
14306 if (InputsAreConsecutiveLoads) {
14307 assert(FirstLoad &&
"Input needs to be a LoadSDNode.");
14311 ReturnSDVal = WideLoad;
14312 }
else if (InputsAreReverseConsecutive) {
14314 assert(LastLoad &&
"Input needs to be a LoadSDNode.");
14319 for (
int i =
N->getNumOperands() - 1;
i >= 0;
i--)
14323 DAG.
getUNDEF(
N->getValueType(0)), Ops);
14327 for (
auto *
LD : InputLoads)
14329 return ReturnSDVal;
14346 for (
unsigned i = 0;
i <
N->getNumOperands();
i++) {
14348 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14350 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14351 CorrectElems = CorrectElems >> 8;
14352 Elems = Elems >> 8;
14357 DAG.
getUNDEF(Input.getValueType()), ShuffleMask);
14359 EVT VT =
N->getValueType(0);
14363 Input.getValueType().getVectorElementType(),
14397 auto isSExtOfVecExtract = [&](
SDValue Op) ->
bool {
14417 if (Input && Input != Extract.
getOperand(0))
14423 Elems = Elems << 8;
14432 for (
unsigned i = 0;
i <
N->getNumOperands();
i++) {
14433 if (!isSExtOfVecExtract(
N->getOperand(
i))) {
14440 int TgtElemArrayIdx;
14442 int OutputSize =
N->getValueType(0).getScalarSizeInBits();
14443 if (InputSize + OutputSize == 40)
14444 TgtElemArrayIdx = 0;
14445 else if (InputSize + OutputSize == 72)
14446 TgtElemArrayIdx = 1;
14447 else if (InputSize + OutputSize == 48)
14448 TgtElemArrayIdx = 2;
14449 else if (InputSize + OutputSize == 80)
14450 TgtElemArrayIdx = 3;
14451 else if (InputSize + OutputSize == 96)
14452 TgtElemArrayIdx = 4;
14456 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14458 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14459 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14460 if (Elems != CorrectElems) {
14479 SDValue Operand =
N->getOperand(0);
14485 auto *
LD = cast<LoadSDNode>(Operand);
14494 if (!ValidLDType ||
14500 LD->getChain(),
LD->getBasePtr(),
14509 DAGCombinerInfo &DCI)
const {
14511 "Should be called with a BUILD_VECTOR node");
14516 if (!Subtarget.hasVSX())
14522 SDValue FirstInput =
N->getOperand(0);
14524 SDValue Reduced = combineElementTruncationToVectorTruncation(
N, DCI);
14539 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14548 if (Subtarget.isISA3_1()) {
14565 if (FirstInput.
getOpcode() !=
N->getOperand(1).getOpcode())
14569 SDValue Ext2 =
N->getOperand(1).getOperand(0);
14576 if (!Ext1Op || !Ext2Op)
14585 if (FirstElem == 0 && SecondElem == 1)
14587 else if (FirstElem == 2 && SecondElem == 3)
14600 DAGCombinerInfo &DCI)
const {
14603 "Need an int -> FP conversion node here");
14616 if (!
Op.getOperand(0).getValueType().isSimple())
14618 if (
Op.getOperand(0).getValueType().getSimpleVT() <=
MVT(
MVT::i1) ||
14619 Op.getOperand(0).getValueType().getSimpleVT() >
MVT(
MVT::i64))
14622 SDValue FirstOperand(
Op.getOperand(0));
14623 bool SubWordLoad = FirstOperand.getOpcode() ==
ISD::LOAD &&
14624 (FirstOperand.getValueType() ==
MVT::i8 ||
14625 FirstOperand.getValueType() ==
MVT::i16);
14626 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14628 bool DstDouble =
Op.getValueType() ==
MVT::f64;
14629 unsigned ConvOp =
Signed ?
14635 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14643 SDValue ExtOps[] = { Ld, WidthConst };
14655 if (
Op.getOperand(0).getValueType() ==
MVT::i32)
14659 "UINT_TO_FP is supported only with FPCVT");
14663 unsigned FCFOp = (Subtarget.hasFPCVT() &&
Op.getValueType() ==
MVT::f32)
14668 MVT FCFTy = (Subtarget.hasFPCVT() &&
Op.getValueType() ==
MVT::f32)
14675 Subtarget.hasFPCVT()) ||
14677 SDValue Src =
Op.getOperand(0).getOperand(0);
14678 if (Src.getValueType() ==
MVT::f32) {
14680 DCI.AddToWorklist(Src.getNode());
14681 }
else if (Src.getValueType() !=
MVT::f64) {
14693 if (
Op.getValueType() ==
MVT::f32 && !Subtarget.hasFPCVT()) {
14696 DCI.AddToWorklist(
FP.getNode());
14720 switch (
N->getOpcode()) {
14725 Chain =
LD->getChain();
14726 Base =
LD->getBasePtr();
14727 MMO =
LD->getMemOperand();
14746 MVT VecTy =
N->getValueType(0).getSimpleVT();
14754 Chain =
Load.getValue(1);
14787 switch (
N->getOpcode()) {
14792 Chain =
ST->getChain();
14793 Base =
ST->getBasePtr();
14794 MMO =
ST->getMemOperand();
14814 SDValue Src =
N->getOperand(SrcOpnd);
14815 MVT VecTy = Src.getValueType().getSimpleVT();
14830 StoreOps, VecTy, MMO);
14837 DAGCombinerInfo &DCI)
const {
14841 unsigned Opcode =
N->getOperand(1).getOpcode();
14844 &&
"Not a FP_TO_INT Instruction!");
14846 SDValue Val =
N->getOperand(1).getOperand(0);
14847 EVT Op1VT =
N->getOperand(1).getValueType();
14854 bool ValidTypeForStoreFltAsInt =
14856 (Subtarget.hasP9Vector() && (Op1VT ==
MVT::i16 || Op1VT ==
MVT::i8)));
14858 if (ResVT ==
MVT::f128 && !Subtarget.hasP9Vector())
14861 if (ResVT ==
MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14862 cast<StoreSDNode>(
N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14868 DCI.AddToWorklist(Val.
getNode());
14876 Val = DAG.
getNode(ConvOpcode,
14878 DCI.AddToWorklist(Val.
getNode());
14882 SDValue Ops[] = {
N->getOperand(0), Val,
N->getOperand(2),
14888 cast<StoreSDNode>(
N)->getMemoryVT(),
14889 cast<StoreSDNode>(
N)->getMemOperand());
14891 DCI.AddToWorklist(Val.
getNode());
14898 bool PrevElemFromFirstVec =
Mask[0] < NumElts;
14899 for (
int i = 1,
e =
Mask.size();
i <
e;
i++) {
14900 if (PrevElemFromFirstVec &&
Mask[
i] < NumElts)
14902 if (!PrevElemFromFirstVec &&
Mask[
i] >= NumElts)
14904 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14915 for (
int i = 0,
e =
Op.getNumOperands();
i <
e;
i++) {
14916 FirstOp =
Op.getOperand(
i);
14922 for (
int i = 1,
e =
Op.getNumOperands();
i <
e;
i++)
14923 if (
Op.getOperand(
i) != FirstOp && !
Op.getOperand(
i).isUndef())
14933 Op =
Op.getOperand(0);
14948 int LHSMaxIdx,
int RHSMinIdx,
14949 int RHSMaxIdx,
int HalfVec,
14950 unsigned ValidLaneWidth,
14952 for (
int i = 0,
e = ShuffV.size();
i <
e;
i++) {
14953 int Idx = ShuffV[
i];
14954 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14956 Subtarget.
isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14967 SDLoc dl(OrigSToV);
14970 "Expecting a SCALAR_TO_VECTOR here");
14983 "Cannot produce a permuted scalar_to_vector for one element vector");
14985 unsigned ResultInElt = NumElts / 2;
15012 int NumElts =
LHS.getValueType().getVectorNumElements();
15022 if (!Subtarget.hasDirectMove())
15032 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15041 if (SToVLHS || SToVRHS) {
15048 if (SToVLHS && SToVRHS &&
15055 int NumEltsOut = ShuffV.size();
15060 unsigned ValidLaneWidth =
15062 LHS.getValueType().getScalarSizeInBits()
15064 RHS.getValueType().getScalarSizeInBits();
15068 int LHSMaxIdx = -1;
15069 int RHSMinIdx = -1;
15070 int RHSMaxIdx = -1;
15071 int HalfVec =
LHS.getValueType().getVectorNumElements() / 2;
15083 LHSMaxIdx = NumEltsOut / NumEltsIn;
15092 RHSMinIdx = NumEltsOut;
15093 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15106 HalfVec, ValidLaneWidth, Subtarget);
15111 if (!isa<ShuffleVectorSDNode>(Res))
15113 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15132 if (IsLittleEndian) {
15135 if (
Mask[0] < NumElts)
15136 for (
int i = 1,
e =
Mask.size();
i <
e;
i += 2) {
15139 ShuffV[
i] = (ShuffV[
i - 1] + NumElts);
15144 for (
int i = 0,
e =
Mask.size();
i <
e;
i += 2) {
15147 ShuffV[
i] = (ShuffV[
i + 1] + NumElts);
15152 if (
Mask[0] < NumElts)
15153 for (
int i = 0,
e =
Mask.size();
i <
e;
i += 2) {
15156 ShuffV[
i] = ShuffV[
i + 1] - NumElts;
15161 for (
int i = 1,
e =
Mask.size();
i <
e;
i += 2) {
15164 ShuffV[
i] = ShuffV[
i - 1] - NumElts;
15171 cast<BuildVectorSDNode>(TheSplat.
getNode())->getSplatValue();
15174 if (IsLittleEndian)
15183 DAGCombinerInfo &DCI)
const {
15185 "Not a reverse memop pattern!");
15190 auto I =
Mask.rbegin();
15191 auto E =
Mask.rend();
15193 for (;
I !=
E; ++
I) {
15210 if (!Subtarget.hasP9Vector())
15213 if(!IsElementReverse(SVN))
15251 unsigned IntrinsicID =
15252 cast<ConstantSDNode>(Intrin.
getOperand(1))->getZExtValue();
15253 if (IntrinsicID == Intrinsic::ppc_stdcx)
15255 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15257 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15259 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15270 switch (
N->getOpcode()) {
15273 return combineADD(
N, DCI);
15275 return combineSHL(
N, DCI);
15277 return combineSRA(
N, DCI);
15279 return combineSRL(
N, DCI);
15281 return combineMUL(
N, DCI);
15284 return combineFMALike(
N, DCI);
15287 return N->getOperand(0);
15291 return N->getOperand(0);
15297 return N->getOperand(0);
15303 return DAGCombineExtBoolTrunc(
N, DCI);
15305 return combineTRUNCATE(
N, DCI);
15307 if (
SDValue CSCC = combineSetCC(
N, DCI))
15311 return DAGCombineTruncBoolExt(
N, DCI);
15314 return combineFPToIntToFP(
N, DCI);
15317 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(
N->getOperand(0));
15318 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(
N), LSBase, DCI);
15320 return combineVectorShuffle(cast<ShuffleVectorSDNode>(
N), DCI.
DAG);
15323 EVT Op1VT =
N->getOperand(1).getValueType();
15324 unsigned Opcode =
N->getOperand(1).getOpcode();
15327 SDValue Val= combineStoreFPToInt(
N, DCI);
15334 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(
N), DCI);
15340 if (cast<StoreSDNode>(
N)->isUnindexed() && Opcode ==
ISD::BSWAP &&
15341 N->getOperand(1).getNode()->hasOneUse() &&
15343 (Subtarget.hasLDBRX() && Subtarget.
isPPC64() && Op1VT ==
MVT::i64))) {
15347 EVT mVT = cast<StoreSDNode>(
N)->getMemoryVT();
15351 SDValue BSwapOp =
N->getOperand(1).getOperand(0);
15358 if (Op1VT.
bitsGT(mVT)) {
15368 N->getOperand(0), BSwapOp,
N->getOperand(2), DAG.
getValueType(mVT)
15372 Ops, cast<StoreSDNode>(
N)->getMemoryVT(),
15373 cast<StoreSDNode>(
N)->getMemOperand());
15379 isa<ConstantSDNode>(
N->getOperand(1)) && Op1VT ==
MVT::i32) {
15381 EVT MemVT = cast<StoreSDNode>(
N)->getMemoryVT();
15391 cast<StoreSDNode>(
N)->setTruncatingStore(
true);
15408 EVT VT =
LD->getValueType(0);
15427 auto ReplaceTwoFloatLoad = [&]() {
15443 if (!
LD->hasNUsesOfValue(2, 0))
15446 auto UI =
LD->use_begin();
15447 while (UI.getUse().getResNo() != 0) ++UI;
15449 while (UI.getUse().getResNo() != 0) ++UI;
15450 SDNode *RightShift = *UI;
15458 if (RightShift->getOpcode() !=
ISD::SRL ||
15459 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15460 RightShift->getConstantOperandVal(1) != 32 ||
15461 !RightShift->hasOneUse())
15464 SDNode *Trunc2 = *RightShift->use_begin();
15487 if (
LD->isIndexed()) {
15489 "Non-pre-inc AM on PPC?");
15498 LD->getPointerInfo(),
LD->getAlign(),
15499 MMOFlags,
LD->getAAInfo());
15505 LD->getPointerInfo().getWithOffset(4),
15508 if (
LD->isIndexed()) {
15522 if (ReplaceTwoFloatLoad())
15525 EVT MemVT =
LD->getMemoryVT();
15531 !Subtarget.hasP8Vector() &&
15534 LD->getAlign() < ABIAlignment) {
15565 MVT PermCntlTy, PermTy, LDTy;
15566 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15567 : Intrinsic::ppc_altivec_lvsl;
15568 IntrLD = Intrinsic::ppc_altivec_lvx;
15569 IntrPerm = Intrinsic::ppc_altivec_vperm;
15590 SDValue BaseLoadOps[] = { Chain, LDXIntID,
Ptr };
15594 BaseLoadOps, LDTy, BaseMMO);
15603 int IncValue = IncOffset;
15620 SDValue ExtraLoadOps[] = { Chain, LDXIntID,
Ptr };
15624 ExtraLoadOps, LDTy, ExtraMMO);
15635 if (isLittleEndian)
15637 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15640 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15643 Perm = Subtarget.hasAltivec()
15659 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
15661 : Intrinsic::ppc_altivec_lvsl);
15662 if (IID ==
Intr &&
N->getOperand(1)->getOpcode() ==
ISD::ADD) {
15669 .
zext(Add.getScalarValueSizeInBits()))) {
15670 SDNode *BasePtr = Add->getOperand(0).getNode();
15671 for (
SDNode *U : BasePtr->uses()) {
15673 cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15683 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15684 SDNode *BasePtr = Add->getOperand(0).getNode();
15685 for (
SDNode *U : BasePtr->uses()) {
15687 isa<ConstantSDNode>(U->getOperand(1)) &&
15688 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15689 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15695 cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15708 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15709 IID == Intrinsic::ppc_altivec_vmaxsh ||
15710 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15726 V2.getOperand(1) == V1) {
15744 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
15747 case Intrinsic::ppc_vsx_lxvw4x:
15748 case Intrinsic::ppc_vsx_lxvd2x:
15757 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
15760 case Intrinsic::ppc_vsx_stxvw4x:
15761 case Intrinsic::ppc_vsx_stxvd2x:
15770 bool Is64BitBswapOn64BitTgt =
15773 N->getOperand(0).hasOneUse();
15774 if (IsSingleUseNormalLd &&
15776 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15789 Ops,
LD->getMemoryVT(),
LD->getMemOperand());
15810 !IsSingleUseNormalLd)
15815 if (!
LD->isSimple())
15819 LD->getPointerInfo(),
LD->getAlign());
15824 LD->getMemOperand(), 4, 4);
15834 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15843 if (!
N->getOperand(0).hasOneUse() &&
15844 !
N->getOperand(1).hasOneUse() &&
15845 !
N->getOperand(2).hasOneUse()) {
15848 SDNode *VCMPrecNode =
nullptr;
15850 SDNode *LHSN =
N->getOperand(0).getNode();
15854 UI->getOperand(1) ==
N->getOperand(1) &&
15855 UI->getOperand(2) ==
N->getOperand(2) &&
15856 UI->getOperand(0) ==
N->getOperand(0)) {
15869 SDNode *FlagUser =
nullptr;
15871 FlagUser ==
nullptr; ++UI) {
15872 assert(UI != VCMPrecNode->
use_end() &&
"Didn't find user!");
15885 return SDValue(VCMPrecNode, 0);
15907 auto RHSAPInt = cast<ConstantSDNode>(
RHS)->getAPIntValue();
15908 if (!RHSAPInt.isIntN(64))
15911 unsigned Val = RHSAPInt.getZExtValue();
15912 auto isImpossibleCompare = [&]() {
15915 if (Val != 0 && Val != 1) {
15917 return N->getOperand(0);
15920 N->getOperand(0),
N->getOperand(4));
15925 unsigned StoreWidth = 0;
15928 if (
SDValue Impossible = isImpossibleCompare())
15942 auto *MemNode = cast<MemSDNode>(
LHS);
15946 MemNode->getMemoryVT(), MemNode->getMemOperand());
15950 if (
N->getOperand(0) ==
LHS.getValue(1))
15951 InChain =
LHS.getOperand(0);
15969 assert(isDot &&
"Can't compare against a vector result!");
15971 if (
SDValue Impossible = isImpossibleCompare())
15974 bool BranchOnWhenPredTrue = (
CC ==
ISD::SETEQ) ^ (Val == 0);
15986 switch (cast<ConstantSDNode>(
LHS.getOperand(1))->getZExtValue()) {
16005 N->getOperand(4), CompNode.
getValue(1));
16010 return DAGCombineBuildVector(
N, DCI);
16012 return combineABS(
N, DCI);
16014 return combineVSelect(
N, DCI);
16025 EVT VT =
N->getValueType(0);
16040 Created.push_back(
Op.getNode());
16044 Created.push_back(
Op.getNode());
16056 const APInt &DemandedElts,
16058 unsigned Depth)
const {
16060 switch (
Op.getOpcode()) {
16064 if (cast<VTSDNode>(
Op.getOperand(2))->getVT() ==
MVT::i16)
16065 Known.
Zero = 0xFFFF0000;
16069 switch (cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue()) {
16071 case Intrinsic::ppc_altivec_vcmpbfp_p:
16072 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16073 case Intrinsic::ppc_altivec_vcmpequb_p:
16074 case Intrinsic::ppc_altivec_vcmpequh_p:
16075 case Intrinsic::ppc_altivec_vcmpequw_p:
16076 case Intrinsic::ppc_altivec_vcmpequd_p:
16077 case Intrinsic::ppc_altivec_vcmpequq_p:
16078 case Intrinsic::ppc_altivec_vcmpgefp_p:
16079 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16080 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16081 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16082 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16083 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16084 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16085 case Intrinsic::ppc_altivec_vcmpgtub_p:
16086 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16087 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16088 case Intrinsic::ppc_altivec_vcmpgtud_p:
16089 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16096 switch (cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue()) {
16099 case Intrinsic::ppc_load2r:
16101 Known.
Zero = 0xFFFF0000;
16131 if (
ML->getLoopDepth() > 1 &&
ML->getSubLoops().empty())
16140 for (
auto I =
ML->block_begin(),
IE =
ML->block_end();
I !=
IE; ++
I)
16142 LoopSize +=
TII->getInstSizeInBytes(J);
16147 if (LoopSize > 16 && LoopSize <= 32)
16161 if (Constraint.
size() == 1) {
16162 switch (Constraint[0]) {
16180 }
else if (Constraint ==
"wc") {
16182 }
else if (Constraint ==
"wa" || Constraint ==
"wd" ||
16183 Constraint ==
"wf" || Constraint ==
"ws" ||
16184 Constraint ==
"wi" || Constraint ==
"ww") {
16197 Value *CallOperandVal =
info.CallOperandVal;
16200 if (!CallOperandVal)
16207 else if ((
StringRef(constraint) ==
"wa" ||
16210 type->isVectorTy())
16212 else if (
StringRef(constraint) ==
"wi" &&
type->isIntegerTy(64))
16214 else if (
StringRef(constraint) ==
"ws" &&
type->isDoubleTy())
16216 else if (
StringRef(constraint) ==
"ww" &&
type->isFloatTy())
16219 switch (*constraint) {
16224 if (
type->isIntegerTy())
16228 if (
type->isFloatTy())
16232 if (
type->isDoubleTy())
16236 if (
type->isVectorTy())
16249 std::pair<unsigned, const TargetRegisterClass *>
16253 if (Constraint.
size() == 1) {
16255 switch (Constraint[0]) {
16258 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16259 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16262 return std::make_pair(0U, &PPC::G8RCRegClass);
16263 return std::make_pair(0U, &PPC::GPRCRegClass);
16269 if (Subtarget.hasSPE()) {
16271 return std::make_pair(0U, &PPC::GPRCRegClass);
16273 return std::make_pair(0U, &PPC::SPERCRegClass);
16276 return std::make_pair(0U, &PPC::F4RCRegClass);
16278 return std::make_pair(0U, &PPC::F8RCRegClass);
16282 if (Subtarget.hasAltivec() && VT.
isVector())
16283 return std::make_pair(0U, &PPC::VRRCRegClass);
16284 else if (Subtarget.hasVSX())
16286 return std::make_pair(0U, &PPC::VFRCRegClass);
16289 return std::make_pair(0U, &PPC::CRRCRegClass);
16291 }
else if (Constraint ==
"wc" && Subtarget.useCRBits()) {
16293 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16294 }
else if ((Constraint ==
"wa" || Constraint ==
"wd" ||
16295 Constraint ==
"wf" || Constraint ==
"wi") &&
16296 Subtarget.hasVSX()) {
16300 return std::make_pair(0U, &PPC::VSRCRegClass);
16301 if (VT ==
MVT::f32 && Subtarget.hasP8Vector())
16302 return std::make_pair(0U, &PPC::VSSRCRegClass);
16303 return std::make_pair(0U, &PPC::VSFRCRegClass);
16304 }
else if ((Constraint ==
"ws" || Constraint ==
"ww") && Subtarget.hasVSX()) {
16305 if (VT ==
MVT::f32 && Subtarget.hasP8Vector())
16306 return std::make_pair(0U, &PPC::VSSRCRegClass);
16308 return std::make_pair(0U, &PPC::VSFRCRegClass);
16309 }
else if (Constraint ==
"lr") {
16311 return std::make_pair(0U, &PPC::LR8RCRegClass);
16313 return std::make_pair(0U, &PPC::LRRCRegClass);
16318 if (Constraint[0] ==
'{' && Constraint[Constraint.
size() - 1] ==
'}') {
16322 if (Constraint.
size() > 3 && Constraint[1] ==
'v' && Constraint[2] ==
's') {
16323 int VSNum = atoi(Constraint.
data() + 3);
16324 assert(VSNum >= 0 && VSNum <= 63 &&
16325 "Attempted to access a vsr out of range");
16327 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16328 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16333 if (Constraint.
size() > 3 && Constraint[1] ==
'f') {
16334 int RegNum = atoi(Constraint.
data() + 2);
16335 if (RegNum > 31 || RegNum < 0)
16338 return Subtarget.hasSPE()
16339 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16340 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16342 return Subtarget.hasSPE()
16343 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16344 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16348 std::pair<unsigned, const TargetRegisterClass *> R =
16358 PPC::GPRCRegClass.contains(R.first))
16360 PPC::sub_32, &PPC::G8RCRegClass),
16361 &PPC::G8RCRegClass);
16364 if (!R.second &&
StringRef(
"{cc}").equals_insensitive(Constraint)) {
16365 R.first = PPC::CR0;
16366 R.second = &PPC::CRRCRegClass;
16370 if (Subtarget.
isAIXABI() && !
TM.getAIXExtendedAltivecABI()) {
16371 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16372 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16373 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16374 errs() <<
"warning: vector registers 20 to 32 are reserved in the "
16375 "default AIX AltiVec ABI and cannot be used\n";
16384 std::string &Constraint,
16385 std::vector<SDValue>&Ops,
16390 if (Constraint.length() > 1)
return;
16392 char Letter = Constraint[0];
16412 if (isInt<16>(
Value))
16416 if (isShiftedUInt<16, 16>(
Value))
16420 if (isShiftedInt<16, 16>(
Value))
16424 if (isUInt<16>(
Value))
16440 if (isInt<16>(-
Value))
16448 if (Result.getNode()) {
16449 Ops.push_back(Result);
16460 if (
I.getNumOperands() <= 1)
16462 if (!isa<ConstantSDNode>(Ops[1].getNode()))
16464 auto IntrinsicID = cast<ConstantSDNode>(Ops[1].getNode())->getZExtValue();
16465 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16466 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16469 if (
I.hasMetadata(
"annotation")) {
16470 MDNode *MDN =
I.getMetadata(
"annotation");
16499 switch (AM.
Scale) {
16530 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
16536 bool isPPC64 = Subtarget.
isPPC64();
16555 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16563 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
16576 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16578 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16592 bool isPPC64 = Subtarget.
isPPC64();
16626 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16644 unsigned Intrinsic)
const {
16645 switch (Intrinsic) {
16646 case Intrinsic::ppc_atomicrmw_xchg_i128:
16647 case Intrinsic::ppc_atomicrmw_add_i128:
16648 case Intrinsic::ppc_atomicrmw_sub_i128:
16649 case Intrinsic::ppc_atomicrmw_nand_i128:
16650 case Intrinsic::ppc_atomicrmw_and_i128:
16651 case Intrinsic::ppc_atomicrmw_or_i128:
16652 case Intrinsic::ppc_atomicrmw_xor_i128:
16653 case Intrinsic::ppc_cmpxchg_i128:
16656 Info.ptrVal =
I.getArgOperand(0);
16662 case Intrinsic::ppc_atomic_load_i128:
16665 Info.ptrVal =
I.getArgOperand(0);
16670 case Intrinsic::ppc_atomic_store_i128:
16673 Info.ptrVal =
I.getArgOperand(2);
16678 case Intrinsic::ppc_altivec_lvx:
16679 case Intrinsic::ppc_altivec_lvxl:
16680 case Intrinsic::ppc_altivec_lvebx:
16681 case Intrinsic::ppc_altivec_lvehx:
16682 case Intrinsic::ppc_altivec_lvewx:
16683 case Intrinsic::ppc_vsx_lxvd2x:
16684 case Intrinsic::ppc_vsx_lxvw4x:
16685 case Intrinsic::ppc_vsx_lxvd2x_be:
16686 case Intrinsic::ppc_vsx_lxvw4x_be:
16687 case Intrinsic::ppc_vsx_lxvl:
16688 case Intrinsic::ppc_vsx_lxvll: {
16690 switch (Intrinsic) {
16691 case Intrinsic::ppc_altivec_lvebx:
16694 case Intrinsic::ppc_altivec_lvehx:
16697 case Intrinsic::ppc_altivec_lvewx:
16700 case Intrinsic::ppc_vsx_lxvd2x:
16701 case Intrinsic::ppc_vsx_lxvd2x_be:
16711 Info.ptrVal =
I.getArgOperand(0);
16718 case Intrinsic::ppc_altivec_stvx:
16719 case Intrinsic::ppc_altivec_stvxl:
16720 case Intrinsic::ppc_altivec_stvebx:
16721 case Intrinsic::ppc_altivec_stvehx:
16722 case Intrinsic::ppc_altivec_stvewx:
16723 case Intrinsic::ppc_vsx_stxvd2x:
16724 case Intrinsic::ppc_vsx_stxvw4x:
16725 case Intrinsic::ppc_vsx_stxvd2x_be:
16726 case Intrinsic::ppc_vsx_stxvw4x_be:
16727 case Intrinsic::ppc_vsx_stxvl:
16728 case Intrinsic::ppc_vsx_stxvll: {
16730 switch (Intrinsic) {
16731 case Intrinsic::ppc_altivec_stvebx:
16734 case Intrinsic::ppc_altivec_stvehx:
16737 case Intrinsic::ppc_altivec_stvewx:
16740 case Intrinsic::ppc_vsx_stxvd2x:
16741 case Intrinsic::ppc_vsx_stxvd2x_be:
16751 Info.ptrVal =
I.getArgOperand(1);
16758 case Intrinsic::ppc_stdcx:
16759 case Intrinsic::ppc_stwcx:
16760 case Intrinsic::ppc_sthcx:
16761 case Intrinsic::ppc_stbcx: {
16763 auto Alignment =
Align(8);
16764 switch (Intrinsic) {
16765 case Intrinsic::ppc_stdcx:
16768 case Intrinsic::ppc_stwcx:
16770 Alignment =
Align(4);
16772 case Intrinsic::ppc_sthcx:
16774 Alignment =
Align(2);
16776 case Intrinsic::ppc_stbcx:
16778 Alignment =
Align(1);
16783 Info.ptrVal =
I.getArgOperand(0);
16785 Info.align = Alignment;
16803 if (Subtarget.hasAltivec() &&
Op.size() >= 16 &&
16805 ((
Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16823 return !(BitSize == 0 || BitSize > 64);
16831 return NumBits1 == 64 && NumBits2 == 32;
16839 return NumBits1 == 64 && NumBits2 == 32;
16846 EVT MemVT =
LD->getMemoryVT();
16864 "invalid fpext types");
16872 return isInt<16>(
Imm) || isUInt<16>(
Imm);
16876 return isInt<16>(
Imm) || isUInt<16>(
Imm);
16881 unsigned *Fast)
const {
16895 !Subtarget.allowsUnalignedFPAccess())
16899 if (Subtarget.hasVSX()) {
16922 if (
auto *ConstNode = dyn_cast<ConstantSDNode>(
C.getNode())) {
16923 if (!ConstNode->getAPIntValue().isSignedIntN(64))
16931 int64_t
Imm = ConstNode->getSExtValue();
16932 unsigned Shift = llvm::countr_zero<uint64_t>(
Imm);
16934 if (isInt<16>(
Imm))
16952 if (Subtarget.hasSPE())
16959 return Subtarget.hasP9Vector();
16967 if (!
I->hasOneUse())
16971 assert(
User &&
"A single use instruction with no uses.");
16973 switch (
I->getOpcode()) {
16974 case Instruction::FMul: {
16976 if (
User->getOpcode() != Instruction::FSub &&
16977 User->getOpcode() != Instruction::FAdd)
17023 static const MCPhysReg ScratchRegs[] = {
17024 PPC::X12, PPC::LR8, PPC::CTR8, 0
17027 return ScratchRegs;
17031 const Constant *PersonalityFn)
const {
17032 return Subtarget.
isPPC64() ? PPC::X3 : PPC::R3;
17036 const Constant *PersonalityFn)
const {
17042 EVT VT ,
unsigned DefinedValues)
const {
17044 return Subtarget.hasDirectMove();
17046 if (Subtarget.hasVSX())
17080 bool LegalOps,
bool OptForSize,
17082 unsigned Depth)
const {
17086 unsigned Opc =
Op.getOpcode();
17087 EVT VT =
Op.getValueType();
17112 if (Flags.hasNoSignedZeros() ||
Options.NoSignedZerosFPMath) {
17116 N0Cost,
Depth + 1);
17120 N1Cost,
Depth + 1);
17122 if (NegN0 && N0Cost <= N1Cost) {
17124 return DAG.
getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17125 }
else if (NegN1) {
17127 return DAG.
getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17170 bool ForCodeSize)
const {
17171 if (!VT.
isSimple() || !Subtarget.hasVSX())
17181 if (Subtarget.hasPrefixInstrs()) {
17186 APSInt IntResult(16,
false);
17191 if (IsExact && IntResult <= 15 && IntResult >= -16)
17193 return Imm.isZero();
17196 return Imm.isPosZero();
17208 unsigned Opcode =
N->getOpcode();
17209 unsigned TargetOpcode;
17228 if (
Mask->getZExtValue() == OpSizeInBits - 1)
17234 SDValue PPCTargetLowering::combineSHL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17240 if (!Subtarget.isISA3_0() || !Subtarget.
isPPC64() ||
17264 SDValue PPCTargetLowering::combineSRA(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17271 SDValue PPCTargetLowering::combineSRL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17290 auto isZextOfCompareWithConstant = [](
SDValue Op) {
17296 if (Cmp.getOpcode() !=
ISD::SETCC || !Cmp.hasOneUse() ||
17297 Cmp.getOperand(0).getValueType() !=
MVT::i64)
17300 if (
auto *
Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17301 int64_t NegConstant = 0 -
Constant->getSExtValue();
17304 return isInt<16>(NegConstant);
17310 bool LHSHasPattern = isZextOfCompareWithConstant(
LHS);
17311 bool RHSHasPattern = isZextOfCompareWithConstant(
RHS);
17314 if (LHSHasPattern && !RHSHasPattern)
17316 else if (!LHSHasPattern && !RHSHasPattern)
17323 auto *
Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17324 int64_t NegConstant = 0 -
Constant->getSExtValue();
17326 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17337 SDValue AddOrZ = NegConstant != 0 ? Add :
Z;
17352 SDValue AddOrZ = NegConstant != 0 ? Add :
Z;
17389 if (!GSDN || !ConstNode)
17396 if (!isInt<34>(NewOffset))
17409 SDValue PPCTargetLowering::combineADD(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17429 DAGCombinerInfo &DCI)
const {
17431 if (Subtarget.useCRBits()) {
17433 if (
SDValue CRTruncValue = DAGCombineTruncBoolExt(
N, DCI))
17434 return CRTruncValue;
17442 EVT VT =
N->getValueType(0);
17453 DCI.DAG.getTargetConstant(0, dl,
MVT::i32));
17462 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17472 EltToExtract = EltToExtract ? 0 : 1;
17482 return DCI.DAG.getNode(
17484 DCI.DAG.getTargetConstant(EltToExtract, dl,
MVT::i32));
17489 SDValue PPCTargetLowering::combineMUL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17493 if (!ConstOpOrElement)
17501 auto IsProfitable = [
this](
bool IsNeg,
bool IsAddOne,
EVT VT) ->
bool {
17524 return IsAddOne && IsNeg ? VT.
isVector() :
true;
17528 EVT VT =
N->getValueType(0);
17535 if ((MulAmtAbs - 1).isPowerOf2()) {
17539 if (!IsProfitable(IsNeg,
true, VT))
17552 }
else if ((MulAmtAbs + 1).isPowerOf2()) {
17556 if (!IsProfitable(IsNeg,
false, VT))
17577 DAGCombinerInfo &DCI)
const {
17582 EVT VT =
N->getValueType(0);
17585 unsigned Opc =
N->getOpcode();
17587 bool LegalOps = !DCI.isBeforeLegalizeOps();
17595 if (!
Flags.hasNoSignedZeros() && !
Options.NoSignedZerosFPMath)
17611 bool PPCTargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
17628 if (!Callee ||
Callee->isVarArg())
17641 bool PPCTargetLowering::
17642 isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI)
const {
17647 if (CI->getBitWidth() > 64)
17649 int64_t ConstVal = CI->getZExtValue();
17650 return isUInt<16>(ConstVal) ||
17651 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17663 SDValue PPCTargetLowering::combineABS(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17665 assert(Subtarget.hasP9Altivec() &&
17666 "Only combine this when P9 altivec supported!");
17667 EVT VT =
N->getValueType(0);
17673 if (
N->getOperand(0).getOpcode() ==
ISD::SUB) {
17676 unsigned SubOpcd0 =
N->getOperand(0)->getOperand(0).getOpcode();
17677 unsigned SubOpcd1 =
N->getOperand(0)->getOperand(1).getOpcode();
17683 N->getOperand(0)->getOperand(0),
17684 N->getOperand(0)->getOperand(1),
17689 if (
N->getOperand(0).getValueType() ==
MVT::v4i32 &&
17690 N->getOperand(0).hasOneUse()) {
17692 N->getOperand(0)->getOperand(0),
17693 N->getOperand(0)->getOperand(1),
17707 DAGCombinerInfo &DCI)
const {
17709 assert(Subtarget.hasP9Altivec() &&
17710 "Only combine this when P9 altivec supported!");
17715 SDValue TrueOpnd =
N->getOperand(1);
17716 SDValue FalseOpnd =
N->getOperand(2);
17717 EVT VT =
N->getOperand(1).getValueType();
17757 CmpOpnd1, CmpOpnd2,
17766 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(
unsigned Flags)
const {
17772 if ((Flags & FlagSet) == FlagSet)
17775 if ((Flags & FlagSet) == FlagSet)
17778 if ((Flags & FlagSet) == FlagSet)
17781 if ((Flags & FlagSet) == FlagSet)
17802 if ((FrameIndexAlign % 4) != 0)
17804 if ((FrameIndexAlign % 16) != 0)
17809 if ((FrameIndexAlign % 4) == 0)
17811 if ((FrameIndexAlign % 16) == 0)
17825 if ((
Imm & 0
x3) == 0)
17827 if ((
Imm & 0xf) == 0)
17833 const APInt &ConstImm = CN->getAPIntValue();
17852 const APInt &ConstImm = CN->getAPIntValue();
17863 !cast<ConstantSDNode>(
RHS.getOperand(1))->getZExtValue())
17875 isValidPCRelNode<ConstantPoolSDNode>(
N) ||
17876 isValidPCRelNode<GlobalAddressSDNode>(
N) ||
17877 isValidPCRelNode<JumpTableSDNode>(
N) ||
17878 isValidPCRelNode<BlockAddressSDNode>(
N));
17883 unsigned PPCTargetLowering::computeMOFlags(
const SDNode *Parent,
SDValue N,
17888 if (!Subtarget.hasP9Vector())
17892 if (Subtarget.hasPrefixInstrs())
17895 if (Subtarget.hasSPE())
17904 unsigned ParentOp = Parent->
getOpcode();
17907 unsigned ID = cast<ConstantSDNode>(Parent->
getOperand(1))->getZExtValue();
17908 if ((
ID == Intrinsic::ppc_vsx_lxvp) || (
ID == Intrinsic::ppc_vsx_stxvp)) {
17909 SDValue IntrinOp = (
ID == Intrinsic::ppc_vsx_lxvp)
17920 if (
const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17921 if (LSB->isIndexed())
17926 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17927 assert(MN &&
"Parent should be a MemSDNode!");
17932 "Not expecting scalar integers larger than 16 bytes!");
17935 else if (Size == 32)
17942 else if (Size == 256) {
17943 assert(Subtarget.pairedVectorMemops() &&
17944 "256-bit vectors are only available when paired vector memops is "
17950 if (Size == 32 || Size == 64)
17962 if (
const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17988 bool IsNonP1034BitConst =
17992 IsNonP1034BitConst)
18005 int16_t ForceXFormImm = 0;
18008 Disp =
N.getOperand(0);
18009 Base =
N.getOperand(1);
18020 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
18021 Disp =
N.getOperand(0);
18022 Base =
N.getOperand(1);
18036 unsigned NumParts,
MVT PartVT, std::optional<CallingConv::ID>
CC)
const {
18052 SDValue PPCTargetLowering::lowerToLibCall(
const char *LibCallName,
SDValue Op,
18056 EVT RetVT =
Op.getValueType();
18064 EVT ArgVT =
N.getValueType();
18069 Entry.IsZExt = !Entry.IsSExt;
18070 Args.push_back(Entry);
18078 (RetTy ==
F.getReturnType() ||
F.getReturnType()->isVoidTy());
18084 .setTailCall(isTailCall)
18091 SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18092 const char *LibCallFloatName,
const char *LibCallDoubleName,
SDValue Op,
18095 return lowerToLibCall(LibCallFloatName,
Op, DAG);
18098 return lowerToLibCall(LibCallDoubleName,
Op, DAG);
18103 bool PPCTargetLowering::isLowringToMASSFiniteSafe(
SDValue Op)
const {
18105 return isLowringToMASSSafe(
Op) &&
Flags.hasNoSignedZeros() &&
18109 bool PPCTargetLowering::isLowringToMASSSafe(
SDValue Op)
const {
18110 return Op.getNode()->getFlags().hasApproximateFuncs();
18113 bool PPCTargetLowering::isScalarMASSConversionEnabled()
const {
18117 SDValue PPCTargetLowering::lowerLibCallBase(
const char *LibCallDoubleName,
18118 const char *LibCallFloatName,
18119 const char *LibCallDoubleNameFinite,
18120 const char *LibCallFloatNameFinite,
18123 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(
Op))
18126 if (!isLowringToMASSFiniteSafe(
Op))
18127 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName,
Op,
18130 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18131 LibCallDoubleNameFinite,
Op, DAG);
18135 return lowerLibCallBase(
"__xl_pow",
"__xl_powf",
"__xl_pow_finite",
18136 "__xl_powf_finite",
Op, DAG);
18140 return lowerLibCallBase(
"__xl_sin",
"__xl_sinf",
"__xl_sin_finite",
18141 "__xl_sinf_finite",
Op, DAG);
18145 return lowerLibCallBase(
"__xl_cos",
"__xl_cosf",
"__xl_cos_finite",
18146 "__xl_cosf_finite",
Op, DAG);
18150 return lowerLibCallBase(
"__xl_log",
"__xl_logf",
"__xl_log_finite",
18151 "__xl_logf_finite",
Op, DAG);
18155 return lowerLibCallBase(
"__xl_log10",
"__xl_log10f",
"__xl_log10_finite",
18156 "__xl_log10f_finite",
Op, DAG);
18160 return lowerLibCallBase(
"__xl_exp",
"__xl_expf",
"__xl_exp_finite",
18161 "__xl_expf_finite",
Op, DAG);
18168 if (!isa<FrameIndexSDNode>(
N))
18186 unsigned Flags = computeMOFlags(Parent,
N, DAG);
18198 "Must be using PC-Relative calls when a valid PC-Relative node is "
18214 int16_t
Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
18228 Disp =
N.getOperand(1).getOperand(0);
18233 Base =
N.getOperand(0);
18240 auto *CN = cast<ConstantSDNode>(
N);
18241 EVT CNType = CN->getValueType(0);
18253 if ((CNType ==
MVT::i32 || isInt<32>(CNImm)) &&
18255 int32_t
Addr = (int32_t)CNImm;
18276 unsigned Opcode =
N.getOpcode();
18284 Base =
N.getOperand(0);
18303 Base = FI ?
N :
N.getOperand(1);
18315 bool IsVarArg)
const {
18327 return Subtarget.
isPPC64() &&
18329 Subtarget.hasQuadwordAtomics();
18363 return Intrinsic::ppc_atomicrmw_xchg_i128;
18365 return Intrinsic::ppc_atomicrmw_add_i128;
18367 return Intrinsic::ppc_atomicrmw_sub_i128;
18369 return Intrinsic::ppc_atomicrmw_and_i128;
18371 return Intrinsic::ppc_atomicrmw_or_i128;
18373 return Intrinsic::ppc_atomicrmw_xor_i128;
18375 return Intrinsic::ppc_atomicrmw_nand_i128;
18389 Value *IncrLo =
Builder.CreateTrunc(Incr, Int64Ty,
"incr_lo");
18391 Builder.CreateTrunc(
Builder.CreateLShr(Incr, 64), Int64Ty,
"incr_hi");
18395 Value *Lo =
Builder.CreateExtractValue(LoHi, 0,
"lo");
18396 Value *Hi =
Builder.CreateExtractValue(LoHi, 1,
"hi");
18397 Lo =
Builder.CreateZExt(Lo, ValTy,
"lo64");
18398 Hi =
Builder.CreateZExt(Hi, ValTy,
"hi64");
18413 Value *CmpLo =
Builder.CreateTrunc(CmpVal, Int64Ty,
"cmp_lo");
18415 Builder.CreateTrunc(
Builder.CreateLShr(CmpVal, 64), Int64Ty,
"cmp_hi");
18416 Value *NewLo =
Builder.CreateTrunc(NewVal, Int64Ty,
"new_lo");
18418 Builder.CreateTrunc(
Builder.CreateLShr(NewVal, 64), Int64Ty,
"new_hi");
18423 Builder.CreateCall(IntCmpXchg, {
Addr, CmpLo, CmpHi, NewLo, NewHi});
18425 Value *Lo =
Builder.CreateExtractValue(LoHi, 0,
"lo");
18426 Value *Hi =
Builder.CreateExtractValue(LoHi, 1,
"hi");
18427 Lo =
Builder.CreateZExt(Lo, ValTy,
"lo64");
18428 Hi =
Builder.CreateZExt(Hi, ValTy,
"hi64");